Relay-Version: version B 2.10 5/3/83; site utzoo.UUCP Path: utzoo!watmath!clyde!bellcore!ulysses!mhuxr!mhuxt!houxm!ihnp4!inuxc!pur-ee!j.cc.purdue.edu!rsk From: rsk@j.cc.purdue.edu (Tyrannosaurus Wombat) Newsgroups: net.sources Subject: atype.c & ctype.c -- simple text statistics Message-ID: <2269@j.cc.purdue.edu> Date: Mon, 13-Oct-86 00:06:35 EDT Article-I.D.: j.2269 Posted: Mon Oct 13 00:06:35 1986 Date-Received: Tue, 14-Oct-86 04:59:32 EDT Organization: Purdue University Computing Center Lines: 190 Keywords: ctype, ascii, classification The following two short programs calculate simple text statistics, and occasionally come in handy; I'm sending these out in net.sources in the hopes of garnering useful comments on them. They tend to be useful in debugging from time to time. Atype prints a table of ascii occurences like this... 468 nul 4 soh 3 stx 1 etx 0 eot 0 enq 0 ack 3 bel 0 bs 0 ht 0 nl 0 vt 2 np 0 cr 0 so 0 si 5 dle 0 dc1 0 dc2 0 dc3 0 dc4 0 nak 0 syn 0 etb 0 can 0 em 0 sub 0 esc 0 fs 0 gs 0 rs 0 us 0 sp 0 ! 0 " 0 # 0 $ 0 % 0 & 0 ' 0 ( 0 ) 0 * 0 + 0 , 0 - 6 . 0 / 0 0 1 1 0 2 0 3 0 4 0 5 0 6 0 7 0 8 0 9 0 : 0 ; 0 < 0 = 0 > 0 ? 0 @ 0 A 0 B 0 C 0 D 0 E 0 F 0 G 1 H 0 I 0 J 0 K 0 L 0 M 0 N 0 O 0 P 0 Q 0 R 0 S 0 T 0 U 0 V 0 W 1 X 0 Y 0 Z 0 [ 0 \ 0 ] 0 ^ 0 _ 0 ` 2 a 0 b 3 c 0 d 3 e 0 f 0 g 0 h 0 i 0 j 0 k 0 l 0 m 0 n 0 o 3 p 0 q 0 r 0 s 3 t 0 u 0 v 0 w 0 x 3 y 0 z 0 { 0 | 0 } 0 ~ 0 del ...and reads either stdin or whatever file arguments are provided. Ctype prints a table of ctype(3) occurences like this... ascii cntrl print space punct alnum digit alpha upper lower 510 487 25 18 7 17 1 0 17 3 ....and reads either stdin or whatever file arguments are provided. Both work on 4.2bsd. One shortcoming of each is known: very large input can cause the printed output fields to overflow, making the display messy. A future release (in mod.sources) will include appropriate manual pages, and whatever enhancements result from comments made by readers. -------------------------------------------------- #include /* Atype.c find numbers of different types of characters in * a file...Rich Kulawiec, 8/2/82 revised 10/86 * Note that characters 200-377 octal are mapped down. */ char *maptable[16][8] = { "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", "bs ", "ht ", "nl ", "vt ", "np ", "cr ", "so ", "si ", "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", "can", "em ", "sub", "esc", "fs ", "gs ", "rs ", "us ", "sp ", " ! ", " \" "," # ", " $ ", " % ", " & ", " ' ", " ( ", " ) ", " * ", " + ", " , ", " - ", " . ", " / ", " 0 ", " 1 ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ", " 8 ", " 9 ", " : ", " ; ", " < ", " = ", " > ", " ? ", " @ ", " A ", " B ", " C ", " D ", " E ", " F ", " G ", " H ", " I ", " J ", " K ", " L ", " M ", " N ", " O ", " P ", " Q ", " R ", " S ", " T ", " U ", " V ", " W ", " X ", " Y ", " Z ", " [ ", " \\ ", " ] ", " ^ ", " _ ", " ` ", " a ", " b ", " c ", " d ", " e ", " f ", " g ", " h ", " i ", " j ", " k ", " l ", " m ", " n ", " o ", " p ", " q ", " r ", " s ", " t ", " u ", " v ", " w ", " x ", " y ", " z ", " { ", " | ", " } ", " ~ ", "del" } ; int count[8][16]; FILE *fp; FILE *fopen(); main(argc, argv) int argc; char *argv[]; { int c,i,j,k; if(argc == 1) { fp = stdin; while((c = getc(fp)) != EOF) count[ ((c&0177) % 8) ][ ((c&0177) / 8) ]++; } else { for ( i = 1; i < argc; i++) { if( (fp=fopen(argv[i],"r")) == NULL) { (void) fprintf(stderr,"atype: can't open %s\n",argv[i]); continue; } while((c = getc(fp)) != EOF) count[ ((c&0177) % 8) ][ ((c&0177) / 8) ]++; (void) fclose(fp); } } for(k=0; k<16; k++) { for(j=0; j<8; j++) (void) printf("%5d %s",count[j][k],maptable[k][j]); (void) printf("\n"); } } -------------------------------------------------- #include #include /* Ctype.c find numbers of different types of characters in * a file...Rich Kulawiec, 4/20/81 revised 10/86 */ FILE *fp; FILE *fopen(); void tally(); #define NASCII 0 #define NCNTRL 1 #define NPRINT 2 #define NALNUM 3 #define NPUNCT 4 #define NALPHA 5 #define NDIGIT 6 #define NUPPER 7 #define NLOWER 8 #define NSPACE 9 #define NCLASS 10 long class[NCLASS]; main(argc, argv) int argc; char *argv[]; { int i,j; for( j = 0; j < NCLASS; j++) class[j] = 0L; (void) printf("ascii\tcntrl\tprint\tspace\tpunct\talnum\tdigit\talpha\tupper\tlower\n"); if( argc == 1) { fp = stdin; tally(fp); } else { for ( i = 1; i < argc; i++) { if( (fp=fopen(argv[i],"r")) == NULL) { (void) fprintf(stderr,"ctype: can't open %s\n",argv[i]); continue; } tally(fp); (void) fclose(fp); } } for ( j = 0; j