Path: utzoo!utgpu!jarvis.csri.toronto.edu!cs.utexas.edu!swrinde!zaphod.mps.ohio-state.edu!brutus.cs.uiuc.edu!ux1.cso.uiuc.edu!ux1.cso.uiuc.edu!aglew From: aglew@oberon.csg.uiuc.edu (Andy Glew) Newsgroups: alt.sources Subject: Re: number recognition tools Message-ID: Date: 18 Jan 90 23:51:20 GMT References: Sender: news@ux1.cso.uiuc.edu (News) Distribution: comp Organization: University of Illinois, Computer Systems Group Lines: 1761 In-Reply-To: aglew@oberon.csg.uiuc.edu's message of 15 Jan 90 11:53:0 This is a repost of the number recognition tools, with a working shar file, and a bug fix courtesy of Joseph Pepin. This post has compiled and successfully completed its test on both a SUN3 and a DECSTATION 3100. ------ CUT HERE ----------------------------------------------------------- #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 'Makefile' <<'END_OF_FILE' XCFLAGS=-g XCC=FP='' /bin/cc Xall: test number.o getnumber Xtests: test Xtest: test.o number.o X ${CC} -g -DTEST -o test test.o number.o -lm Xgetnumber: getnumber.o number.o X ${CC} -g -o getnumber getnumber.o number.o -lm Xnumber.o: number.c X END_OF_FILE if test 237 -ne `wc -c <'Makefile'`; then echo shar: \"'Makefile'\" unpacked with wrong size! fi # end of 'Makefile' fi if test -f 'README' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'README'\" else echo shar: Extracting \"'README'\" \(967 characters\) sed "s/^X//" >'README' <<'END_OF_FILE' X/* X * number X * X * This is a directory for a family of routines that convert X * a string to an integer. The intention is to be able to freely X * recognize just about any format integer: X * X * Decimal 1342334 X * Hex 0xAB43 X * Octal 01377 X * Binary 0b100100011 X * Arbitrary Radix rrr#vvvvvvvv X * X * Because people often want to provide a special format over and above X * those that are already provided X * X * Eg. Hex 'ABC'Z X * Decimal 10. X * Ignore _ 100_677_888 X * X * the intent is to define a, possibly parametrized, recognizer function X * for each format, and then to pass a list of desired recognizer functions X * for your specific recognizer. X * X * This is not intended to be fast, only general. X * X * All recognizers are of the form: X * X * success = RECOGNIZER( string, resultptr ) X * int success; /* -1 indicates failure */ X * char *string; X * int *result; X * X */ X X X X X END_OF_FILE if test 967 -ne `wc -c <'README'`; then echo shar: \"'README'\" unpacked with wrong size! fi # end of 'README' fi if test -f 'bug.JDP' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'bug.JDP'\" else echo shar: Extracting \"'bug.JDP'\" \(948 characters\) sed "s/^X//" >'bug.JDP' <<'END_OF_FILE' XReturn-Path: XDate: Thu, 18 Jan 90 15:26:53 EST XFrom: jdp@tarpon.att.com (Joseph Pepin) XTo: aglew@oberon.csg.uiuc.edu XSubject: Re: number recognition tools XNewsgroups: alt.sources XIn-Reply-To: XOrganization: AT&T Bell Laboratories X XTwo serious bugs: X X1) The shar file is corrupt: the END-OF-FILE after each man page Xis preceeded by an "X", causing the next file not to be unshar'ed. XMaybe the original files were not terminated by a NL. X X2) Executing "getnumber -100e-2" dumps core on a 3B2 and a 6386WGS Xrunning SVR3.2. The problem is you don't malloc enough space Xfor the right side of an infix expression: X XLine 218 of number.c: X X right = malloc ... InfixStrLen))); X Xshould be: X X right = malloc ... InfixStrLen))+1); X XI suspect that other systems treat malloc(0) as malloc(1). X XPlease fix these bugs and/or post this message: my newsserver Xdiscourages postings. X END_OF_FILE if test 948 -ne `wc -c <'bug.JDP'`; then echo shar: \"'bug.JDP'\" unpacked with wrong size! fi # end of 'bug.JDP' fi if test -f 'debug.h' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'debug.h'\" else echo shar: Extracting \"'debug.h'\" \(4128 characters\) sed "s/^X//" >'debug.h' <<'END_OF_FILE' X#ifndef DEBUG_H X X#define DEBUG_H 1 X X#ifdef DEBUG Xint DEBUG_was_defined; X#endif X X/* X * manual NAME debug.h - Andy Glew's debug header X * X * USAGE #define DEBUG 1 #include "debug.h" ... debugf((fmt,vars...)); X * X * DESCRIPTION X * X * The header file "debug.h" may be found in a directory near you. It X * contains macros to make the production of debugging messages more X * pleasant. X * X * debugf X * X * The most important/useful of these macros is debugf((...)). Debugf X * is a printf (to stdout by default, optionally to stderr or X * elsewhere). It accepts standard printf format strings and a X * variable number of arguments. The only syntactic difference is the X * necessity of double parentheses about the parameter list X * (necessary because C macros can't have variable numbers of X * arguments). Debugf usually produces one line of output per call, X * with a distinctive mark like "Debug in file XXX line NNN". X * X * Example: while( ... ) { debugf(("in loop\n")); ... } X * X * DEBUG X * X * To use debugf: #define DEBUG 1 #include "debug.h" DEBUG must be X * defined before debug.h is included, either in code or in a -dDEBUG X * flag when compiled. If DEBUG is not defined when debug.h is X * included, debugf and other debugging macros occupy no space in X * your program. X * X * nodebugf X * X * nodebugf((...)) is syntactic sugar to make it easy to turn debugfs X * off without having to remove them or go through convolutions X * setting a debug control variable. X * X * debugshow X * X * debugshow(var,fmt) produces the quintessential debugging output X * VARIABLE=VALUE_OF_VARIABLE. fmt is the format string you would use X * in printf, without the double quotes. X * X * Example: int Ingrid=77; debugshow(Ingrid,%d); Produces Debug in file X * XXX line NNN: Ingrid = 77 X * X * _debugf X * X * _debugf is the name of the function to be used to print the debugging X * output, printf by default. It can be changed at any time to X * another varargs function. eprintf is useful - just X * fprintf(stderr...) although it must be rewritten as a function due X * to the weaknesses of C. Logging functions, and the like, can also X * be useful. X * X * DebugCondition X * X * There are actually several layers of indirection in this macro X * system: X * X * debugshow -> debugf -> _ifdebugf -> _debugf X * X * _ifdebugf should not be changed; but the condition DebugCondition X * which it tests can usefully be changed. By default DebugCondition X * is defined as (1); it is often nice to set it to a variable that X * you can patch X * X * #define DebugCondition DebugVar int DebugVar = 0; #define DEBUG 1 X * #include "debug.h" X * X * I would have made a variable the default except for awkwardnesses X * some people have about adding modules to the standard C library. X * X * Some people like having multiple debug levels, although I don't. X * These can also be stacked. X * X * The function name __FUNC__ should be printed out as soon as the C X * compiler is fixed. X * X * manual X */ X X#if defined(DEBUG) X# define DEBUGcode( sl ) sl X# define DEBUGdecl( decl ) decl X#else X# define DEBUGcode( sl ) X# define DEBUGdecl( decl ) X#endif X X#define noDEBUGcode( sl ) X#define noDEBUGdecl( sl ) X X/* X * double brackets about _debugf's parmlist so that you can do X * #define _debugf(v) (printf v,uprintf v) which is useful in the X * kernel X */ X# if defined(DEBUG) X# if !defined(_debugf) X# define _debugf(parmlist) (printf parmlist) X# endif X X/* DebugCondition can be controlled by the user */ X# define _ifdebugf(parmlist) ( DebugCondition ? _debugf(parmlist) : 0 ) X# if !defined(DebugCondition) X# define DebugCondition (1) X# endif X# endif /* DEBUG */ X X# if defined(DEBUG) X# define debugf(parmlist) \ X ( _ifdebugf(("Debug in file %s line %d ",__FILE__,__LINE__)), \ X _ifdebugf(parmlist) \ X ) X# else X# define debugf(parmlist) X# endif X# define nodebugf(parmlist) X X /* debugshow - cannot use "s in arguments */ X#ifdef DEBUG X# define debugshow(var,fmt) debugf(("var = fmt\n",var)) X# define nodebugshow(var,fmt) X#else X# define debugshow(var,fmt) X# define nodebugshow(var,fmt) X#endif X X X#endif /* DEBUG_H */ X END_OF_FILE if test 4128 -ne `wc -c <'debug.h'`; then echo shar: \"'debug.h'\" unpacked with wrong size! fi # end of 'debug.h' fi if test -f 'getnumber.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'getnumber.c'\" else echo shar: Extracting \"'getnumber.c'\" \(899 characters\) sed "s/^X//" >'getnumber.c' <<'END_OF_FILE' X/* User level wrapper for Dgetnumber */ X Xvoid exit(); X Xint Igetnumber(); Xint Dgetnumber(); X Xint DorI = 'D'; X Xchar *format = "%g"; X Xmain(argc,argv) X int argc; X char **argv; X{ X double dres; X int ires; X X for(;*++argv;) { X if( !strcmp(*argv,"-i") ) { X DorI = 'I'; X format = "%d"; X } X else if( !strcmp(*argv,"-d") ) { X DorI = 'D'; X format = "%g"; X } X else if( !strcmp(*argv,"-format") ) { X format = *++argv; X } X else { X switch( DorI ) { X default: X exit(-1); X case 'D': X if( Dgetnumber(*argv,&dres) == -1 ) { X (void)printf("invalid\n"); X } X else { X (void)printf(format,dres); X (void)printf("\n"); X } X break; X case 'I': X if( Igetnumber(*argv,&ires) == -1 ) { X (void)printf("invalid\n"); X } X else { X (void)printf(format,ires); X (void)printf("\n"); X } X break; X } X } X } X exit(0); X /*NOTREACHED*/ X} X X END_OF_FILE if test 899 -ne `wc -c <'getnumber.c'`; then echo shar: \"'getnumber.c'\" unpacked with wrong size! fi # end of 'getnumber.c' fi if test -f 'getnumber.man' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'getnumber.man'\" else echo shar: Extracting \"'getnumber.man'\" \(2080 characters\) sed "s/^X//" >'getnumber.man' <<'END_OF_FILE' X.nf X NAME: X getnumber \- read a number in arbitrary notation X X SYNOPSIS: X getnumber [-i|-d|-format '%printf-format'] number-string ... X X DESCRIPTION: X getnumber is a program wrapped around the Dgetnumber and Igetnumber X family of routines (see their man pages). Getnumber converts number X in an almost arbitrary string representation, and prints the number X out on stdout as a decimal integer or double precision value, X or using a printf like string. X X getnumber is intended to be used in shell scripts that would like X to be able to recognize numbers typed in by the user in their natural X format. Eg. X ARG=0.5M-1 X ... X value=`getnumber -i $ARG` X XINTERFACE: X Getnumber processes its command line arguments and prints to stdout. X Stdin is not used. X X The command line option X X -i -- convert to an integer, using %d format to print X -d -- convert to a C double, using %g format to print X X -format %printf-format X -- format to be used in printing the result. X X Exits with error status -1 on a command line error. X Prints "invalid" on conversion errors, but continues to process. X XBACKGROUND: X See the man pages for the Dgetnumber and Igetnumber family X for more deatils (man number). X X The intention is to be able to freely recognize just about any X format number: X X Decimal 1342334 X Hex 0xAB43 X Octal 01377 X Binary 0b100100011 X Arbitrary Radix rrr#vvvvvvvv X H:M:S 1:20:33 X Real 1.45 X "Meg" 4M X Expressions (4M-1)*2 X Exponential 1.2E6 X X Expressions currently include: X infix binary: | ^ & << >> + - * / % **(exponent) X prefix unary: - + ~ X midfix grouping: () [] {} X and it is similarly easy to add new notations. X X All number representations and expressions can be intermixed: X [(2M-1)*4]>>0x03 X X NOTES: X X AUTHOR: X Andy Glew (aglew@uiuc.edu) X X HISTORY: X Originally written by Andy Glew at McGill University, 1983 X X BUGS: X END_OF_FILE if test 2080 -ne `wc -c <'getnumber.man'`; then echo shar: \"'getnumber.man'\" unpacked with wrong size! fi # end of 'getnumber.man' fi if test -f 'number.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'number.c'\" else echo shar: Extracting \"'number.c'\" \(19062 characters\) sed "s/^X//" >'number.c' <<'END_OF_FILE' X/* X * number X * X * This is a directory for a family of routines that convert X * a string to a number. X * X * X * The intention is to be able to freely recognize just about any X * format number: X * X * Decimal 1342334 X * Hex 0xAB43 X * Octal 01377 X * Binary 0b100100011 X * Arbitrary Radix rrr#vvvvvvvv X * H:M:S 1:20:33 X * Real 1.45 X * "Meg" 4M X * Expressions (4M-1)*2 X * Exponential 1.2E6 X * X * Because people often want to provide a special format over and above X * those that are already provided X * X * Eg. Hex 'ABC'Z X * Decimal 10. X * Ignore _ 100_677_888 X * X * the intent is to define a, possibly parametrized, recognizer function X * for each format, and then to pass a list of desired recognizer functions X * for your specific recognizer. X * X * This is not intended to be fast, only general. X * X * All recognizers are of the form: X * X * success = RECOGNIZER( string, resultptr ) X * int success; { -1 indicates failure } X * char *string; X * double *result; X * X * Recognition is done bottom up instead of top-down; X * instead of having a grammar that constrains notation, X * everything is passed to low-level recognizers that try to recognize X * the string, perhaps recursively, passing off to other recognizer X * in case of failure. X * X * The current recognizers are: X * X * Octal0 0 eg. 0377 = 0xFF X * SimpleDecimalString eg. 10 = 0xA X * Binary0b 0b eg. 0b011 = 3 X * Decimal0d 0d eq. 10 = 0xA X * Hex0x 0x eg. 0xA = 10 X * ArbitraryRadix # eq. 3#22 = 8 X * colon60 M:S eg. 1:20 = 80 X * colon60colon60 H:M:S eg. 2:1:20 = 7280 X * PowersOf2 [KMG] eg. 0.5K = 512 X * RealDecimalString eg. 0.5 X * Expressions eg. 0.5M-1 X * X * Expressions currently include: X * infix binary: | ^ & << >> + - * / % **(exponent) X * prefix unary: - + ~ X * midfix grouping: () [] {} X * and it is similarly easy to add new notations. X * X * All number representations and expressions can be intermixed: X * [(2M-1)*4]>>0x03 X * X * There are some functions useful in building other recognizers, like X * RadixString(), and the expression building functions. X * X * There are two top level recognizers, X * Dgetnumber(str,res) X * and Igetnumber(str,res); X * the "I" version is basically a call to the "D" version, which rounds, X * and errors if the rounded integer value is more than int_threshold X * away from the non-int value. X * X * Initially, this was integer only, but in Jan 89 I changed it X * to produce a floating point result - if you want integer, just X * integerize. X * This will have some lossage if your floating point format X * cannot represent all integer values exactly. Sorry - in that X * case, you'll just have to go back to the old routine. X * It has the advantage of one family of routines being able X * to handle intermediate cases - like 0.5M. X * It has the advantage of, on a system with decent floating X * point, being able to trap on overflow or underflow. X * Interim: add this for Motorola systems. X * If you can, use your system dependent way of trapping on inexact. X * Interim: if you have IEEE floating point, it would be nice to X * have this same routine read in NaNs. X */ X X#include X#include X#include X Xextern double pow(); Xextern double floor(); Xextern double fabs(); Xextern char *malloc(); Xextern void free(); X X#define DEBUG 1 X#include "debug.h" X X#define index strchr Xchar *index(); X#define rindex strrchr Xchar *rindex(); X Xvoid exit(); X X/* Interim: no tree building. all recursive in place. */ X Xint MidfixExpression(str,res,preStr,postStr,MidfixFunc) X char *str; X double *res; X char *preStr, *postStr; X double (*MidfixFunc)(); X{ X char *midStr; X double midVal; X int strLen = strlen(str); X int preStrLen = strlen(preStr); X int postStrLen = strlen(postStr); X int match; X X nodebugf(("In midfix <%s> <%s> <%s>\n", str, preStr, postStr)); X X if( str == 0 || *str == 0) { X return -1; X } X X if( strncmp(str,preStr,preStrLen) != 0 ) { X return -1; X } X if( strcmp(str+strLen-postStrLen, postStr) != 0 ) { X return -1; X } X X midStr = malloc((unsigned)(strLen-preStrLen-postStrLen+1)); X if( !midStr ) { X (void)fprintf(stderr,"Error malloc'ing memory for expressions\n"); X exit(1); X } X (void)strncpy(midStr,str+preStrLen,strLen-preStrLen-postStrLen); X midStr[strLen-preStrLen-postStrLen] = '\0'; X nodebugf(("midStr=<%s>\n",midStr)); X match = (Dgetnumber(midStr,&midVal) != -1); X free(midStr); X if( !match ) { X return -1; X } X *res = MidfixFunc(midVal); X return 0; X} Xdouble Dnop(a) double a; { return a; } X X Xint PrefixExpression(str,res,PrefixStr,PrefixFunc) X char *str; X double *res; X char *PrefixStr; X double (*PrefixFunc)(); X{ X double Darg; X int PrefixStrLen = strlen(PrefixStr); X X if( str == 0 || *str == 0) { X return -1; X } X X if( strncmp(str,PrefixStr,PrefixStrLen) != 0 ) { X return -1; X } X else { X if( Dgetnumber(str+PrefixStrLen,&Darg) == -1 ) { X return -1; X } X *res = PrefixFunc(Darg); X return 0; X } X} Xdouble Dplussign(a) double a; { return a; } Xdouble Dnegsign(a) double a; { return -a; } Xdouble Dinvert(a) double a; { return (double)~(unsigned)a; } X X Xint InfixExpression(str,res,InfixStr,InfixFunc) X char *str; X double *res; X char *InfixStr; X double (*InfixFunc)(); X{ X char *left, *right; X double Dleft, Dright; X char *p1; X int match; X int InfixStrLen = strlen(InfixStr); X X if( str == 0 || *str == 0) { X return -1; X } X X /* Try splitting into subexpressions at all occurrences of the operator, X moving from left to handle 1-1-1. Retries will handle subexpressions. X */ X for( p1 = str + strlen(str);;p1--) { X if( p1 < str ) { X return -1; X } X if( !strncmp(p1,InfixStr,InfixStrLen) ) { X left = malloc((unsigned)(p1-str+1)); X right = malloc((unsigned)(strlen(p1+InfixStrLen))+1); X /* Bug JDP1: found by jdp@tarpon.att.com, X Joseph Pepin - did not malloc enough space for RHS, X missing +1 above. -100e-2 test case. */ X if( !left || !right ) { X (void)fprintf(stderr,"Error malloc'ing memory for expressions\n"); X exit(1); X } X (void)strncpy(left,str,p1-str); X left[p1-str] = '\0'; X (void)strcpy(right, p1+InfixStrLen ); X match = Dgetnumber(left,&Dleft) != -1 X && Dgetnumber(right,&Dright) != -1; X free(left); X free(right); X if( match ) { X *res = InfixFunc(Dleft,Dright); X return 0; X } X } X } X} X Xdouble Dplus(a,b) double a,b; { return a+b; } Xdouble Dsub(a,b) double a,b; { return a-b; } Xdouble Dtimes(a,b) double a,b; { return a*b; } Xdouble Ddivide(a,b) double a,b; { return a/b; } X/* Interim: need to indicate loss of info on these "pesudo-integer" ops */ Xdouble Dor(a,b) double a,b; { return (double)((unsigned)a | (unsigned)b); } Xdouble Dand(a,b) double a,b; { return (double)((unsigned)a & (unsigned)b); } Xdouble Dxor(a,b) double a,b; { return (double)((unsigned)a ^ (unsigned)b); } Xdouble Dremainder(a,b) double a,b; { return (double)((unsigned)a % (unsigned)b); } Xdouble Dlsh(a,b) double a,b; { return (double)((unsigned)a << (unsigned)b); } Xdouble Drsh(a,b) double a,b; { return (double)((unsigned)a >> (unsigned)b); } Xdouble Dexponent(a,b) double a,b; { extern double pow(); return pow(a,b); } X X Xint Expression(str,res) X char *str; X double *res; X{ X if( str == 0 || *str == 0) { X return -1; X } X X if( InfixExpression(str,res,"|",Dor) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"^",Dxor) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"&",Dand) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"<<",Dlsh) == 0 ) { X return 0; X } X if( InfixExpression(str,res,">>",Drsh) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"+",Dplus) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"-",Dsub) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"*",Dtimes) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"/",Ddivide) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"%",Dremainder) == 0 ) { X return 0; X } X if( InfixExpression(str,res,"**",Dexponent) == 0 ) { X return 0; X } X if( PrefixExpression(str,res,"+",Dplussign) == 0 ) { X return 0; X } X if( PrefixExpression(str,res,"-",Dnegsign) == 0 ) { X return 0; X } X if( PrefixExpression(str,res,"~",Dinvert) == 0 ) { X return 0; X } X if( MidfixExpression(str,res,"(",")",Dnop) == 0 ) { X return 0; X } X if( MidfixExpression(str,res,"[","]",Dnop) == 0 ) { X return 0; X } X if( MidfixExpression(str,res,"{","}",Dnop) == 0 ) { X return 0; X } X return -1; X} X X Xint SimpleDecimalString(str,res) X char *str; X double *res; X{ X if( str == 0 || *str == 0) { X return -1; X } X X return RadixString(str,10,res); X} X Xint Binary0b(str,res) X char *str; X double *res; X{ X if( str == 0 || *str == 0) { X return -1; X } X X if( str[0] == '0' && (str[1] == 'b' || str[1] == 'B') ) { X return RadixString(str+2,2,res); X } X return -1; X} X Xint Octal0(str,res) X char *str; X double *res; X{ X if( str == 0 || *str == 0) { X return -1; X } X X if( str[0] == '0' ) { X return RadixString(str+1,8,res); X } X return -1; X} X Xint Decimal0d(str,res) X char *str; X double *res; X{ X if( str == 0 || *str == 0) { X return -1; X } X X if( str[0] == '0' && (str[1] == 'd' || str[1] == 'D') ) { X return RadixString(str+2,10,res); X } X return -1; X} X Xint Hex0x(str,res) X char *str; X double *res; X{ X if( str == 0 || *str == 0) { X return -1; X } X X if( str[0] == '0' && (str[1] == 'x' || str[1] == 'X') ) { X return RadixString(str+2,16,res); X } X return -1; X} X Xint ArbitraryRadix(str,res) X char *str; X double *res; X{ X int radix; X int digval; X X if( str == 0 || *str == 0) { X return -1; X } X X for( radix = 0; (digval = DECIMAL_DIGIT_VALUE(*str)) != -1; str++ ) { X radix = radix*10 + digval; X } X if( *str != '#' ) X return -1; X return RadixString(str+1,radix,res); X} X Xint RadixString(str,radix,res) X char *str; X int radix; X double *res; X{ X int val; X int digval; X X if( str == 0 || *str == 0) { X return -1; X } X X val = 0; X for(;;) { X if( !IGNORE(*str) ) { X digval = DIGIT_VALUE(*str); X if( digval < 0 || digval >= radix ) X return -1; X val = val*radix + digval; X } X str++; X if( *str == '\0' ) X break; X } X *res = val; X X return 0; X} X X/* Recognisers for M:S and H:M:S forms */ X X#define PRECOLON() { \ X if( (colonpos = index(str,':')) == 0 ) { \ X return -1; \ X } \ X precolon_len = colonpos-str; \ X if( precolon_len > 128-1 ) { \ X /* string too long */ \ X return -1; \ X } \ X /* Copy the precolon string and interpret as a decimal number */ \ X (void)strncpy( precolon_str, str, precolon_len ); \ X precolon_str[precolon_len] = '\0'; \ X \ X if( SimpleDecimalString(precolon_str,&precolon_val) == -1 ) { \ X return -1; \ X } \ X if( precolon_val != (int) precolon_val || precolon_val < 0 ) { \ X return -1; \ X } \ X} X Xint colon60(str,res) X char *str; X double *res; X{ X char *colonpos; X char precolon_str[128]; X int precolon_len; X double precolon_val; X double postcolon_val; X X X if( str == 0 || *str == 0) { X return -1; X } X X PRECOLON(); X X if( DecimalString(colonpos+1,&postcolon_val) == -1 ) { X return -1; X } X if( postcolon_val < 0 || 60 <= postcolon_val ) { X return -1; X } X X *res = precolon_val*60 + postcolon_val; X return 0; X} Xint colon60colon60(str,res) X char *str; X double *res; X{ X char *colonpos; X char precolon_str[128]; X int precolon_len; X double precolon_val; X double postcolon_val; X X if( str == 0 || *str == 0) { X return -1; X } X X PRECOLON(); X X if( colon60(colonpos+1,&postcolon_val) == -1 ) { X return -1; X } X if( postcolon_val < 0 || 60*60 <= postcolon_val) { X return -1; X } X X *res = precolon_val*(60*60) + postcolon_val; X return 0; X} X X/* Recognize common powers of 2: 2^10=K, 2^20=M, 2^30=G X Interim: would handle 2^40=T if it didn't overflow */ X Xint PowersOf2(str,res) X char *str; X double *res; X{ X int sl = strlen(str); X double val; X char buf[128]; X X if( str == 0 || *str == 0) { X return -1; X } X X if( sl <= 0 || sl > 128-1 ) { X return -1; X } X (void)strcpy(buf,str); X buf[sl-1] = '\0'; X X if( str[sl-1] == 'K' ) { X if( DecimalString(buf,&val) == -1 ) { X return -1; X } X *res = val * 1024; X return 0; X } X else if( str[sl-1] == 'M' ) { X if( DecimalString(buf,&val) == -1 ) { X return -1; X } X *res = val * 1024*1024; X return 0; X } X else if( str[sl-1] == 'G' ) { X if( DecimalString(buf,&val) == -1 ) { X return -1; X } X *res = val * 1024*1024*1024; X return 0; X } X return -1; X} X X/* Floating point recognizer. X Interim: decimal floating point only. X iii.fffff form only. X Interim: should recognize scientific notation iii.fffEeeee */ X Xint RealDecimalString(str,res) X char *str; X double *res; X{ X double val; X int sl; X int infraction; X int scale; X X if( str == 0 || *str == 0) { X return -1; X } X X sl = strlen(str); X X if( sl <= 0 ) { X return -1; X } X X if( index(str,'.') == 0 ) { X return -1; X } X X /* accumulation of value down from least significant end X first, to reduce errors */ X val = 0; X infraction = 1; X scale = 1; X for(;sl-->0;) { X int d = DECIMAL_DIGIT_VALUE(str[sl]); X if( d == -1 ) { X if( str[sl] == '.' ) { X infraction = 0; X continue; X } X else { X return -1; X } X } X if( infraction ) { X val += d; X val /= 10.0; X } X else { X val = d*scale + val; X scale = scale*10; X } X } X *res = val; X return 0; X} X X X X Xint DecimalString(str,res) X char *str; X double *res; X{ X if( str == 0 || *str == 0) { X return -1; X } X X if( SimpleDecimalString(str,res) == -1 X && RealDecimalString(str,res) == -1 ) { X return -1; X } X return 0; X} X X X/* Generic routine for signs */ Xint Signed(func,str,res) X int (*func) (); X char *str; X double *res; X{ X double value; X int ret; X int neg; X X if( str == 0 || *str == 0) { X return -1; X } X X switch( str[0] ) { X case '+': X neg = 0; X ret = func(str+1,&value); X break; X case '-': X neg = 1; X ret = func(str+1,&value); X break; X default: X neg = 0; X ret = func(str,&value); X break; X } X if( ret == -1 ) { X return -1; X } X else { X if( neg ) value = -value; X *res = value; X return 0; X } X X} X X X X/* Scientific notation recognizer */ X Xint ScientificExponentialNotation(str,res) X char *str; X double *res; X{ X char *dupptr; X char *expptr; X double exponent; X double mantissa; X int retval; X extern char *strdup(); X X if( str == 0 || *str == 0) { X return -1; X } X X if( (dupptr = strdup(str)) == 0 ) { X (void)fprintf(stderr,"insufficient free memory to duplicate string\n"); X exit(1); X } X if( (expptr = strchr(dupptr,'E')) == 0 X && (expptr = strchr(dupptr,'e')) == 0 X ) { X retval = -1; X goto cleanup; X } X X *expptr++ = '\0'; X X /* dupptr is now a pointer to the mantissa, expptr to the exponent, X both null terminated strings */ X if( Signed(SimpleDecimalString,expptr,&exponent) == -1 ) { X retval = -1; X goto cleanup; X } X X if( Signed(RealDecimalString,dupptr,&mantissa) == -1 X && Signed(SimpleDecimalString,dupptr,&mantissa) == -1 ) { X retval = -1; X goto cleanup; X } X X *res = mantissa * pow(10.0,exponent); X retval = 0; X X cleanup: X free(dupptr); X X return retval; X} X X X/* Utility functions */ Xchar *IGNORE_CHARS = "_"; X Xint IGNORE(c) X char c; X{ X return index(IGNORE_CHARS,c) != 0; X} X Xint DIGIT_VALUE(d) X char d; X{ X int val; X X switch( d ) { X default: val = -1; break; X case '0': val = 0; break; X case '1': val = 1; break; X case '2': val = 2; break; X case '3': val = 3; break; X case '4': val = 4; break; X case '5': val = 5; break; X case '6': val = 6; break; X case '7': val = 7; break; X case '8': val = 8; break; X case '9': val = 9; break; X case 'a': case 'A': val = 0xA; break; X case 'b': case 'B': val = 0xB; break; X case 'c': case 'C': val = 0xC; break; X case 'd': case 'D': val = 0xD; break; X case 'e': case 'E': val = 0xE; break; X case 'f': case 'F': val = 0xF; break; X } X return val; X} X Xint DECIMAL_DIGIT_VALUE(d) X char d; X{ X int val = DIGIT_VALUE(d); X if( val < 0 || val >= 10 ) X return -1; X else X return val; X} X X X X Xtypedef int (*Recognizer)(); X XRecognizer DefaultRecognizers[] = { X Octal0, X SimpleDecimalString, X Binary0b, X Decimal0d, X Hex0x, X ArbitraryRadix, X colon60, X colon60colon60, X PowersOf2, X RealDecimalString, X Expression, X ScientificExponentialNotation, X 0 X}; X X/* Flags for DgetnumberList */ X#define PRIO_RESOLVE_AMBIGUITY 1 X Xint DgetnumberList(str,res,flags,RecognizerList) X char *str; X double *res; X int flags; X Recognizer RecognizerList[]; X{ X int found; X double oldval = 0; /* to silence lint "oldval may be used before set" */ X double newval; X Recognizer *fp; X X /* Special test for null strings. X All recognizers should really handle this, but... X */ X if( str == 0 || str[0] == 0 ) { X return -1; X } X X for(fp = RecognizerList, found=0; *fp; fp++) { X if( (*fp)(str,&newval) != -1 ) { X if( flags & PRIO_RESOLVE_AMBIGUITY ) { X found = 1; X break; X } X if( found ) { X if( newval != oldval ) { X return -1; X } X } X else { X oldval = newval; X found = 1; X } X } X } X if( found ) { X *res = newval; X return 0; X } X else { X return -1; X } X} X X X/* Recognizer form of the above, with the default list */ X Xint Dgetnumber(str,res) X char *str; X double *res; X{ X if( DgetnumberList(str,res, X PRIO_RESOLVE_AMBIGUITY, X DefaultRecognizers) X == -1 ) { X return -1; X } X return 0; X} X X X/* Integer version of the above. X Includes a threshold because arithmetic may be inexact (sigh) */ X/* Interim: should this be parametrized for "D" function to call, X threshold, and list of recognizers? I'm not sure. */ X Xdouble int_threshold = 0.00000001; X Xint Igetnumber(str,res) X char *str; X int *res; X{ X extern double floor(); X extern double fabs(); X double dres; X double delta; X X if( Dgetnumber(str,&dres) == -1 ) { X return -1; X } X *res = (int)floor(dres+0.5); X delta = (double)*res - dres; X if( fabs(delta) > int_threshold ) { X return -1; X } X return 0; X} X X X X/* interim: strdup for BSD. remove if you already have it */ Xchar *strdup(s) X char *s; X{ X char *mp = malloc(strlen(s)+1); X if( mp == 0 ) { X (void)fprintf(stderr,"Error malloc'ing in strdup\n"); X exit(1); X } X (void)strcpy(mp,s); X return mp; X} X END_OF_FILE if test 19062 -ne `wc -c <'number.c'`; then echo shar: \"'number.c'\" unpacked with wrong size! fi # end of 'number.c' fi if test -f 'number.man' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'number.man'\" else echo shar: Extracting \"'number.man'\" \(5263 characters\) sed "s/^X//" >'number.man' <<'END_OF_FILE' X.nf X NAME: X Dgetnumber, Igetnumber \- a family of string to number conversion routines X X SYNOPSIS: X X /* Default double precision recognizer */ X success = Dgetnumber( string, resultptr ) X int success; { -1 indicates failure } X char *string; X double *result; X X /* Default integer recognizer */ X success = Igetnumber( string, resultptr ) X int success; { -1 indicates failure } X char *string; X int *result; X X /* Threshold used for integerizing double precision values. */ X double int_threshold; X X DESCRIPTION: X X Dgetnumber and Igetnumber are two representatives (probably all that a typical X use may ever encounter) of a family of routines for string representations of X numbers to numbers in machine internal representation. X X They were written out of frustration with programs and routines that seldom X accept all of the "natural" representations of numbers for a problem -- X disk utilities that require decimal numbers as input, while disk error loggers X produce hex numbers on output, times that need to be converted from H:M:S X before they can be used, etc. X X The intention is to be able to freely recognize just about any X format number: X X Decimal 1342334 X Hex 0xAB43 X Octal 01377 X Binary 0b100100011 X Arbitrary Radix rrr#vvvvvvvv X H:M:S 1:20:33 X Real 1.45 X "Meg" 4M X Expressions (4M-1)*2 X Exponential 1.2E6 X X Because people often want to provide a special format over and above X those that are already provided X X Eg. Hex 'ABC'Z X Decimal 10. X Ignore _ 100_677_888 X X the intent is to define a, possibly parametrized, recognizer function X for each format, and then to pass a list of desired recognizer functions X for your specific recognizer. X X This is not intended to be fast, only general. X X All recognizers are of the form: X X success = RECOGNIZER( string, resultptr ) X int success; { -1 indicates failure } X char *string; X double *result; X X Recognition is done bottom up instead of top-down; X instead of having a grammar that constrains notation, X everything is passed to low-level recognizers that try to recognize X the string, perhaps recursively, passing off to other recognizer X in case of failure. X X The current recognizers are: X X Octal0 0 eg. 0377 = 0xFF X SimpleDecimalString eg. 10 = 0xA X Binary0b 0b eg. 0b011 = 3 X Decimal0d 0d eq. 10 = 0xA X Hex0x 0x eg. 0xA = 10 X ArbitraryRadix # eq. 3#22 = 8 X colon60 M:S eg. 1:20 = 80 X colon60colon60 H:M:S eg. 2:1:20 = 7280 X PowersOf2 [KMG] eg. 0.5K = 512 X RealDecimalString eg. 0.5 X Expressions eg. 0.5M-1 X X Expressions currently include: X infix binary: | ^ & << >> + - * / % **(exponent) X prefix unary: - + ~ X midfix grouping: () [] {} X and it is similarly easy to add new notations. X X All number representations and expressions can be intermixed: X [(2M-1)*4]>>0x03 X X There are some functions useful in building other recognizers, like X RadixString(), and the expression building functions. X X There are two top level recognizers, X Dgetnumber(str,res) X and Igetnumber(str,res); X the "I" version is basically a call to the "D" version, which rounds, X and errors if the rounded integer value is more than int_threshold X away from the non-int value. X X These use an internal function X X typedef int (*Recognizer)(); X X int DgetnumberList(str,res,flags,RecognizerList) X char *str; X double *res; X int flags; X Recognizer RecognizerList[]; X X which is called by default with X X Recognizer DefaultRecognizers[] X X An easy way for users to customize these routines is to X create a private list of recognizers, deleting standard recognizers X that are undesired, and adding user coded recognizers that have X not been provided (eg. nnCnnTnnB cylinder/track/block notation) X and then call DgetnumberList() from their own top-level wrapper. X X (Internal detail: a flag controls whether conflicting matches X should be an error or not). X X NOTES: X Initially, this was integer only, but in Jan 89 I changed it X to produce a floating point result - if you want integer, just X integerize. X This will have some lossage if your floating point format X cannot represent all integer values exactly. Sorry - in that X case, you'll just have to go back to the old routine. X It has the advantage of one family of routines being able X to handle intermediate cases - like 0.5M. X It has the advantage of, on a system with decent floating X point, being able to trap on overflow or underflow. X But this is not added. X If you can, use your system dependent way of trapping on inexact. X Interim: if you have IEEE floating point, it would be nice to X have this same routine read in NaNs. X X AUTHOR: X Andy Glew (aglew@uiuc.edu) X X HISTORY: X Originally written by Andy Glew at McGill University, 1983 X X BUGS: X END_OF_FILE if test 5263 -ne `wc -c <'number.man'`; then echo shar: \"'number.man'\" unpacked with wrong size! fi # end of 'number.man' fi if test -f 'test.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'test.c'\" else echo shar: Extracting \"'test.c'\" \(4677 characters\) sed "s/^X//" >'test.c' <<'END_OF_FILE' Xvoid exit(); X Xint verbose = 1; X Xint StopOnError = 1; X Xmain(argc,argv) X int argc; X char **argv; X{ X int aval; X int gval; X int gret; X X if( argc == 1 ) { X AutoTests(); X } X else for(;*++argv;) { X if(0) ; X else if( !strcmp(*argv,"-auto") ) { X AutoTests(); X } X else if( !strcmp(*argv,"-verbose") ) { X verbose = 1; X } X else if( !strcmp(*argv,"-noverbose") ) { X verbose = 0; X } X else if( !strcmp(*argv,"-stoponerror") ) { X StopOnError = 1; X } X else if( !strcmp(*argv,"-nostoponerror") ) { X StopOnError = 0; X } X else if( !strcmp(*argv,"-test") ) { X gret = Igetnumber(argv[1],&gval); X X if( !strcmp(argv[2],"invalid") ) { X if( gret == -1 ) X exit(0); X else { X (void)printf("<%s> <%s> failed - invalid\n",argv[1],argv[2]); X exit(1); X } X } X else { X aval = atoi(argv[2]); X if( gret == -1 ) { X (void)printf("<%s> invalid - should be <%s> %d\n",argv[1],argv[2],aval); X exit(1); X } X else { X if( aval != gval ) { X (void)printf("<%s> %d should be <%s> %d\n", X argv[1],gval,argv[2],aval); X exit(1); X } X else X exit(0); X } X } X } X else { X (void)printf("Unknown argument <%s>\n",*argv); X exit(1); X } X } X exit(0); X /*NOTREACHED*/ X} X Xstruct TestVec { X char *str; X int value; X int valid; X} TV[] = { X { "-100e-2", -1, 1 }, /* Bug JDP1: X found by jdp@tarpon.att.com, X Joseph Pepin - did not malloc X enough space for RHS X */ X { "-111E0A", 0, 0 }, X { "-111E0", -111, 1 }, X { "+121E0", 121, 1 }, X { "100E-2", 1, 1 }, X { "-100E-2", -1, 1 }, X { "+100E-2", 1, 1 }, X { "1E+1", 10, 1 }, X { "1.2E1", 12, 1 }, X { "0.5E6", 500000, 1 }, X { "-111e0A", 0, 0 }, X { "-111e0", -111, 1 }, X { "+121e0", 121, 1 }, X { "100e-2", 1, 1 }, X { "-100e-2", -1, 1 }, X { "+100e-2", 1, 1 }, X { "1e+1", 10, 1 }, X { "1.2e1", 12, 1 }, X { "0.5e6", 500000, 1 }, X { "2**4+1", 17, 1 }, X { "(0xFF>>2)+1", 64, 1 }, X { "(1<<2)+1", 5, 1 }, X { "1<<2+1", 8, 1 }, X /* replicated because of an old stateful error */ X { "-(-(-(-(-(-3)))))", 3, 1 }, X { "-(-(-(-(-(-3)))))", 3, 1 }, X { "-(-(-(-(-(-3)))))", 3, 1 }, X { "[1+(2*3)]*{1+2}", 21, 1 }, X { "1+(2+3)", 6, 1 }, X { "1-1-1", -1, 1 }, X { "(0)", 0, 1 }, X { "(1K)+1", 1025, 1 }, X { "2*(1K)+1", 2049, 1 }, X { "2*(2K+1)+(2M/1K)", 6146, 1 }, X { "(0)", 0, 1 }, X { "(0)", 0, 1 }, X { "0b0000", 0, 1 }, X { "0b11", 3, 1 }, X { "-0b101", -5, 1 }, X { "-5K", -5120, 1 }, X { "0.5K", 512, 1 }, X { "13M", 13631488, 1 }, X { "1G", 1073741824, 1 }, X { "1:20", 80, 1 }, X { "2:1:20", 7280, 1 }, X { "I-0b101", 0, 0 }, X { "10.3", 0, 0 }, X { "x0", 0, 0 }, X { "0x", 0, 0 }, X { "-1-", 0, 0 }, X { "-1+", 0, 0 }, X { "-", 0, 0 }, X { "+", 0, 0 }, X { "-4+1", -3, 1 }, X { "-4*-3", 12, 1 }, X { "4*-3", -12, 1 }, X { "-4*3", -12, 1 }, X { 0, 0 } X}; X XAutoTests() X{ X int i; X struct TestVec *tv; X X for( tv=TV; tv->str; tv++ ) { X int val; X if( Igetnumber(tv->str,&val) == -1 ) { X if( tv->valid ) { X (void)printf("Error: <%s> invalid, should be %d\n", X tv->str, tv->value ); X if( StopOnError ) { X exit(1); X } X } X else { X if( verbose ) (void)printf("Passed: <%s> invalid\n",tv->str); X } X } X else { X if( tv->valid ) { X if( val != tv->value ) { X (void)printf("Error: <%s> %d, should be %d\n", X tv->str, val, tv->value ); X if( StopOnError ) { X exit(1); X } X } X else { X if( verbose ) (void)printf("Passed: <%s> %d\n", tv->str, val); X } X } X else { X (void)printf("Error: <%s> %d, should be invalid\n", X tv->str, val ); X if( StopOnError ) { X exit(1); X } X } X } X } X X X for(i=0;i<100;i++) X TestAllPatterns(i); X for(i=132;i<1000000000;i+=12331) X TestAllPatterns(i); X X} X XTestAllPatterns(i) X{ X TestFormat(i,"%d"); X TestFormat(i,"0%o"); X TestFormat(i,"0x%x"); X TestFormat(i,"0d%d"); X} XTestFormat(i,fstr) X int i; X char *fstr; X{ X char buf[128]; X X /* With no sign */ X (void)sprintf(buf+1,fstr,i); X TestBufferValue(buf+1,i); X /* With sign */ X buf[0]='+'; X TestBufferValue(buf,i); X buf[0]='-'; X TestBufferValue(buf,-i); X} XTestBufferValue(buf,i) X char *buf; X int i; X{ X int val; X if( Igetnumber(buf,&val) == -1 ) { X (void)printf("error - <%s> invalid, should be %d\n",buf,i); X if( StopOnError ) { X exit(1); X } X } X else if( val != i ) { X (void)printf("error - <%s> %d, should be %d\n",buf,val,i); X if( StopOnError ) { X exit(1); X } X } else { X if( verbose ) (void)printf("Passed: <%s> %d\n",buf,i); X } X} X X X END_OF_FILE if test 4677 -ne `wc -c <'test.c'`; then echo shar: \"'test.c'\" unpacked with wrong size! fi # end of 'test.c' fi echo shar: End of shell archive. exit 0 -- Andy Glew, aglew@uiuc.edu