Path: utzoo!utgpu!watmath!watdragon!violet!ajmyrvold From: ajmyrvold@violet.waterloo.edu (Alan Myrvold) Newsgroups: comp.software-eng Subject: Re: C source lines in file Message-ID: <15957@watdragon.waterloo.edu> Date: 18 Aug 89 01:44:31 GMT References: <35120@ccicpg.UUCP> Sender: daemon@watdragon.waterloo.edu Reply-To: alanm@cognos.uucp (Alan Myrvold) Distribution: world Organization: Cognos Inc. Lines: 333 Keywords: In article <35120@ccicpg.UUCP> swonk@ccicpg.UUCP (Glen Swonk) writes: >-Does anyone have a program or a method of determing >-the number of C source lines in a source file? Ok. First off, sources don't really belong in comp.software-eng ... so I feel a bit guilty, but here's a reasonably portable C program to count : NCSL - non-commentary source lines LINES - source lines COMMENTS - C comments NCC - non-contiguous comments It will even run on systems where (heaven forbid) the argv[] list isn't as convienient to use as Unix's. And should compile with either a K&R or ANSI-style compiler. One known bug in the program is that VAX CC (and others) allow: #include "foo.c"" Which confuses the string parsing part of my program. Obfusicated C contest winners may also foul the program. Flames and comments to alanm@cognos.uucp, please. - Alan --- cut here --- /* LOC.C count C lines of code, comments */ /* For each c file, produces NCSL - non-commentary source lines LINES - source lines COMMENTS - C comments NCC - non-contiguous comments If invoked with no arguments and the file "cfiles.lis" does not exist, input is taken from stdin, output goes to stdout. If invoked with no arguments and "cfiles.lis" does exist, the filenames are assume to be in "cfiles.lis", and the output is written to BOTH stdout and "cfiles.out". If invoked with arguments, the args are taken as filenames, and output is written to stdout. */ /* Alan Myrvold 3755 Riverside Dr. uunet!mitel!sce!cognos!alanm */ /* Cognos Incorporated P.O. Box 9707 alanm@cognos.uucp */ /* (613) 738-1440 x5530 Ottawa, Ontario */ /* CANADA K1G 3Z4 */ #include #include #include #define NORM 0 #define COMMENT 1 #define STRING 2 #define CHAR 3 #define ID 4 #define SPECIAL 5 #define WHITE 6 static long LINES_OF_CODE,LAST_LINE,CURRENT_LINE,COMMENTS,NCC,IS_CONTIG; #define id1(c) (isalpha(c) || ((c) == '_')) #define id2(c) (id1(c) || (('0' <= (c)) && ((c) <= '9')) || ((c) == '$')) #define is_white(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n')) void echo_fn(k,s) int k; char *s; { fputs(s,stdout); } void dump_white(s) char *s; { for (; *s; s++) { switch (*s) { case '\t' : printf("\\t"); break; case '\n' : printf("\\n"); break; case ' ' : printf("_"); break; default : putchar(*s); } } } void dump_fn(k,s) int k; char *s; { switch (k) { case ID : printf("ID %s\n",s); break; case COMMENT : printf("COMMENT %s\n",s); break; case SPECIAL : printf("SPECIAL %s\n",s); break; case STRING : printf("STRING %s\n",s); break; case CHAR : printf("CHAR %s\n",s); break; case WHITE : printf("WHITE "); dump_white(s); putchar('\n'); break; default : printf("unknown %s\n",s); } } void count_fn(k,s) int k; char *s; { switch (k) { case ID : case SPECIAL : case STRING : case CHAR : if (CURRENT_LINE != LAST_LINE) { LINES_OF_CODE++; LAST_LINE = CURRENT_LINE; } IS_CONTIG = 0; break; case COMMENT : COMMENTS++; if (!IS_CONTIG) { IS_CONTIG = 1; NCC++; } break; } } /* Beware trespassers of this code ... it is rather obtuse... */ /* but it SEEMS to work */ void tokenize(f,t) FILE *f; void (*t)(); { int skip_next,in_id,in_white,bptr,mode,c,old_c,retain; static char buffer[8000]; IS_CONTIG = NCC = LINES_OF_CODE = COMMENTS = CURRENT_LINE = 0; LAST_LINE = -1; bptr = 0; mode = NORM; old_c = ' '; skip_next = in_id = in_white = 0; while (old_c != EOF) { c = getc(f); if (c == '\n') CURRENT_LINE++; retain = 0; /* Now, in NORM mode, we read one too many characters before deciding to start a new token */ if (mode == NORM) { /* already in id mode */ if (in_id) { if (id2(c)) { /* stay in mode */ retain = 1; buffer[bptr++] = c; } else { /* send off identifier */ buffer[bptr] = 0; t(ID,buffer); in_id = bptr = 0; } } /* already in white mode */ if (in_white) { if (is_white(c)) { /* stay in mode */ retain = 1; buffer[bptr++] = c; } else { /* send off white space */ buffer[bptr] = 0; t(WHITE,buffer); in_white = bptr = 0; } } /* Check if we are going to change modes now */ if (!in_white && is_white(c)) { /* start white mode */ retain = 1; buffer[bptr++] = c; in_white = 1; } if (!in_id && id1(c)) { /* start id mode */ retain = 1; in_id = 1; buffer[bptr++] = c; } /* start other modes */ switch (c) { case '/' : /* look ahead 1 character */ if (ungetc(getc(f),f) == '*') { retain = 1; mode = COMMENT; } break; case '\'' : retain = 1; mode = CHAR; break; case '\"' : retain = 1; mode = STRING; break; } } /* Now deal with the modes where we know when we are done */ switch (mode) { case COMMENT : retain = 1; buffer[bptr++] = c; if ((c == '/') && (old_c == '*')) { mode = NORM; buffer[bptr] = 0; t(COMMENT,buffer); bptr = 0; } break; case CHAR : retain = 1; buffer[bptr++] = c; if (skip_next) { skip_next = 0; } else { skip_next = (c == '\\'); if ((bptr > 1) && (c == '\'')) { mode = NORM; buffer[bptr] = 0; t(CHAR,buffer); bptr = 0; } } break; case STRING : retain = 1; buffer[bptr++] = c; if (skip_next) { skip_next = 0; } else { skip_next = (c == '\\'); if ((bptr > 1) && (c == '\"')) { mode = NORM; buffer[bptr] = 0; t(STRING,buffer); bptr = 0; } } break; } /* one-character token */ if (!retain) { buffer[0] = c; buffer[1] = 0; if (c != EOF) t(SPECIAL,buffer); } /* save previous character */ old_c = c; } } int count_main(argc,argv) int argc; char *argv[]; { int i,ier; FILE *fout,*mas,*f; char fbuf[80]; int exit(); ier = 0; if ((argc < 2) || ((argc == 2) && (strcmp(argv[1],"-") == 0))) { mas = fopen("cfiles.lis","rt"); if (mas) { fout = fopen("cfiles.out","wt"); if (!fout) exit(0); while (fscanf(mas,"%s",fbuf) == 1) { f = fopen(fbuf,"rt"); if (!f) { ier = 1; } else { tokenize(f,count_fn); printf("%s %ld %ld %ld %ld\n",fbuf, LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC); fprintf(fout,"%s %ld %ld %ld %ld\n",fbuf, LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC); fclose(f); } } fclose(mas); fclose(fout); } else { tokenize(stdin,count_fn); printf("%ld %ld %ld %ld\n",LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC); } } else { for (i = 1; i < argc; i++) { f = fopen(argv[i],"rt"); if (!f) { ier = 1; } else { tokenize(f,count_fn); printf("%s %ld %ld %ld %ld\n",argv[i], LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC); fclose(f); } } } return ier; } int main(argc,argv) int argc; char *argv[]; { #if VAX return !count_main(argc,argv); #else return count_main(argc,argv); #endif } --- cut here ---