Xref: utzoo comp.sources.d:4430 news.admin:7864 Path: utzoo!utgpu!jarvis.csri.toronto.edu!mailrus!purdue!decwrl!reid From: reid@decwrl.dec.com (Brian Reid) Newsgroups: comp.sources.d,news.admin Subject: "inpaths.c" (last updated Oct 17 1989 14:20) Message-ID: <336@apostrophe.dec.com> Date: 1 Dec 89 18:08:31 GMT Sender: reid@decwrl.dec.com Organization: DEC Western Research Laboratory Lines: 387 Approved: reid@decwrl.dec.com The flow data collected at decwrl shows the path that each article takes to reach its destination. This data is combined at decwrl once a month and used to produce these various reports. Here is the program that produces the data. Every site, no matter how small, is invited and encouraged to run this program and mail in the results. /* inpaths.c -- track the paths of incoming news articles and prepare * in a format suitable for decwrl pathsurveys * * * This program inputs a list of filenames of news articles, and outputs a * data report which should be mailed to the decwrl Network Monitoring * Project at address "pathsurvey@decwrl.dec.com". Please run it once a month * if you can, in time so that the results arrive at decwrl by the 1st * day of the month. * * * Run it like this: * * cd /usr/spool/news * find . -type f -print | inpaths "yourhost" | mail pathsurvey@decwrl.dec.com * * where "yourhost" is the host name of your computer, e.g. "decwrl". * * The input to "inpaths" must be a list of the file names of news articles, * relative to the spooling directory. "./news/config/2771" and * "news/config/2771" are both legal inputs, but "/usr/spool/news/config/2771" * is not. * If you have some other way of generating a list of news file * names, such as running a script over the history file, you can use that * instead. Inpaths handles crossposting regardless of which technique * you use. * * If you get an error message "no traffic found. Check $CWD", then the * problem is most likely that the path names you are giving it are not * relative to the spooling directory, e.g. you are feeding it lines like * "/usr/spool/news/news/config/2771" instead of "./news/config/2771" * * There are 3 options: -s, -m, and -l for short, medium, and long report. * The default is to produce a long report. If you are worried about mail * expenses you can send a shorter report. The long report is typically * about 50K bytes for a major site, and perhaps 25K bytes for a smaller * site. * * Brian Reid * V1 Sep 1986 * V2.4 May 1989 * * Special thanks to Mel Pleasant and Bob Thrush for significant help with * portability bugs. * */ /* if you are compiling on a USG machine (SysV, etc), please uncomment the following line: */ /* #define SYSV */ #define VERSION "2.4" #include #include #include #include #include #define HEADBYTES 1024 #ifdef SYSV long time(); #else SYSV time_t time(); #endif SYSV extern void exit(); extern char *malloc(); extern char *strcpy(); /* this is index() or strchr() included here for portability */ char *index(ptr,chr) char *ptr,chr; { do {if (*ptr==chr) return(ptr);} while (*ptr++); return ( (char *) NULL); } main (argc,argv) int argc; char **argv; { char linebuf[1024], jc, *lptr, *cp, *cp1, *cp2; char rightdelim; char *pathfield, *groupsfield; int crossposted; char artbuf[HEADBYTES], ngfilename[256]; struct stat statbuf, *sbptr; char *scanlimit; char *hostname; char hostString[128]; int needHost; static int passChar[256]; int isopen,columns,verbose,totalTraffic; long nowtime,age,agesum; float avgAge; /* definitions for getopt */ extern int optind; extern char *optarg; /* structure used to tally the traffic between two hosts */ typedef struct trec { struct trec *rlink; struct nrec *linkid; int tally; } ; /* structure to hold the information about a host */ typedef struct nrec { struct nrec *link; struct trec *rlink; char *id; long sentto; /* tally of articles sent to somebody from here */ } ; struct nrec *hosthash[128], *hnptr, *list, *relay; struct trec *rlist; int i, article, gotbytes, c; extern errno; hostname = "unknown"; verbose = 2; while (( c=getopt(argc, argv, "sml" )) != EOF) switch (c) { case 's': verbose=0; break; case 'm': verbose=1; break; case 'l': verbose=2; break; case '?': fprintf(stderr, "usage: %s [-s] [-m] [-l] hostname\n",argv[0]); exit(1); } if (optind < argc) { hostname = argv[optind]; } else { fprintf(stderr,"usage: %s [-s] [-m] [-l] `hostname`\n",argv[0]); exit(1); } fprintf(stderr,"computing %s inpaths for host %s\n", verbose==0 ? "short" : (verbose==1 ? "medium" : "long"),hostname); for (i = 0; i<128; i++) hosthash[i] = (struct nrec *) NULL; /* precompute character types to speed up scan */ for (i = 0; i<=255; i++) { passChar[i] = 0; if (isalpha(i) || isdigit(i)) passChar[i] = 1; if (i == '-' || i == '.' || i == '_') passChar[i] = 1; } totalTraffic = 0; nowtime = (long) time(0L); agesum = 0; while (gets(linebuf) != (char *) NULL) { lptr = linebuf; isopen = 0; /* Skip blank lines */ if (linebuf[0] == '\0') goto bypass; /* Skip files that do not have pure numeric names */ i = strlen(lptr)-1; do { if (!isdigit(linebuf[i])) { if (linebuf[i]=='/') break; goto bypass; } i--; } while (i>=0); /* Open the file for reading */ article = open(lptr, O_RDONLY); isopen = (article > 0); if (!isopen) goto bypass; sbptr = &statbuf; if (fstat(article, sbptr) == 0) { /* Record age of file in hours */ age = (nowtime - statbuf.st_mtime) / 3600; agesum += age; /* Reject names that are not ordinary files */ if ((statbuf.st_mode & S_IFREG) == 0) goto bypass; /* Pick the file name apart into an equivalent newsgroup name */ if (*lptr == '.') { lptr++; if (*lptr == '/') lptr++; } cp = ngfilename; while (*lptr != 0) { if (*lptr == '/') *cp++ = '.'; else *cp++ = *lptr; lptr++; } cp--; while (isdigit(*cp)) *cp-- = NULL; if (*cp == '.') *cp = NULL; } else goto bypass; /* Read in the first few bytes of the article; find the end of the header */ gotbytes = read(article, artbuf, HEADBYTES); if (gotbytes < 10) goto bypass; /* Find "Path:" header field */ pathfield = (char *) 0; groupsfield = (char *) 0; scanlimit = &artbuf[gotbytes]; for (cp=artbuf; cp <= scanlimit; cp++) { if (*cp == '\n') break; if (pathfield && groupsfield) goto gotpath; if (strncmp(cp, "Path: ", 6) == 0) { pathfield = cp; goto nextgr; } if (strncmp(cp, "Newsgroups: ", 12) == 0) { groupsfield = cp; goto nextgr; } nextgr: while (*cp != '\n' && cp <= scanlimit) cp++; } if (groupsfield == (char *) 0 || (pathfield == (char *) 0)) goto bypass; gotpath: ; /* Determine the name of the newsgroup to which this is charged. It is not necessarily the name of the file in which we found it; rather, use the "Newsgroups:" field. */ crossposted = 0; groupsfield += 12; /* skip 'Newsgroups: ' */ while (*groupsfield == ' ') groupsfield++; cp= (char *) index(groupsfield,'\n'); *cp = 0; cp=(char *) index(groupsfield,','); if (cp) { crossposted++; *cp = 0; } /* To avoid double-billing, only charge the newsgroup if the pathname matches the contents of the Newsgroups: field. This will also prevent picking up junk and control messages. */ if (strcmp(ngfilename,groupsfield)) goto bypass; /* Extract all of the host names from the "Path:" field and put them in our host table. */ cp = pathfield; while (*cp != NULL && *cp != '\n') cp++; if (cp == NULL) { fprintf(stderr,"%s: end of Path line not in buffer.\n",lptr); goto bypass; } totalTraffic++; *cp = 0; pathfield += 5; /* skip 'Path:' */ cp1 = pathfield; relay = (struct nrec *) NULL; rightdelim = '!'; while (cp1 < cp) { /* get next field */ while (*cp1=='!') cp1++; cp2 = ++cp1; while (passChar[(int) (*cp2)]) cp2++; rightdelim = *cp2; *cp2 = 0; if (rightdelim=='!' && *cp1 != (char) NULL) { /* see if already in the table */ list = hosthash[*cp1]; while (list != NULL) { /* * Attempt to speed things up here a bit. Since we hash * on the first char, we see if the second char is a match * before calling strcmp() */ if (list->id[1] == cp1[1] && !strcmp(list->id, cp1)) { hnptr = list; break; /* I hate unnecessary goto's */ } list = list->link; } if(list == NULL) { /* get storage and splice in a new one */ hnptr = (struct nrec *) malloc(sizeof (struct nrec)); hnptr->id = (char *) strcpy(malloc(1+strlen(cp1)),cp1); hnptr->link = hosthash[*cp1]; hnptr->rlink = (struct trec *) NULL; hnptr->sentto = (long) 0; hosthash[*cp1] = hnptr; } } /* At this point "hnptr" points to the host record of the current host. If there was a relay host, then "relay" points to its host record (the relay host is just the previous host on the Path: line. Since this Path means that news has flowed from host "hnptr" to host "relay", we want to tally one message in a data structure corresponding to that link. We will increment the tally record that is attached to the source host "hnptr". */ if (relay != NULL && relay != hnptr) { rlist = relay->rlink; while (rlist != NULL) { if (rlist->linkid == hnptr) goto have2; rlist = rlist->rlink; } rlist = (struct trec *) malloc(sizeof (struct trec)); rlist->rlink = relay->rlink; relay->rlink = rlist; rlist->linkid = hnptr; rlist->tally = 0; have2: rlist->tally++; hnptr->sentto++; } cp1 = cp2; relay = hnptr; if (rightdelim == ' ' || rightdelim == '(') break; } bypass: if (isopen) close(article) ; } /* Now dump the host table */ if (!totalTraffic) { fprintf(stderr,"%s: error--no traffic found. Check $CWD.\n",argv[0]); exit(1); } avgAge = ((double) agesum) / (24.0*(double) totalTraffic); printf("ZCZC begin inhosts %s %s %d %d %3.1f\n", VERSION,hostname,verbose,totalTraffic,avgAge); for (jc=0; jc<127; jc++) { list = hosthash[jc]; while (list != NULL) { if (list->rlink != NULL) { if (verbose > 0 || (100*list->sentto > totalTraffic)) printf("%ld\t%s\n",list->sentto, list->id); } list = list->link; } } printf("ZCZC end inhosts %s\n",hostname); printf("ZCZC begin inpaths %s %s %d %d %3.1f\n", VERSION,hostname,verbose,totalTraffic,avgAge); for (jc=0; jc<127; jc++) { list = hosthash[jc]; while (list != NULL) { if (verbose > 1 || (100*list->sentto > totalTraffic)) { if (list->rlink != NULL) { columns = 3+strlen(list->id); sprintf(hostString,"%s H ",list->id); needHost = 1; rlist = list->rlink; while (rlist != NULL) { if ( (100*rlist->tally > totalTraffic) || ((verbose > 1)&&(5000*rlist->tally>totalTraffic)) ) { if (needHost) printf("%s",hostString); needHost = 0; relay = rlist->linkid; if (columns > 70) { printf("\n%s",hostString); columns = 3+strlen(list->id); } printf("%d Z %s U ", rlist->tally, relay->id); columns += 9+strlen(relay->id); } rlist = rlist->rlink; } if (!needHost) printf("\n"); } } list = list->link; } } printf("ZCZC end inpaths %s\n",hostname); fclose(stdout); exit(0); } Brought to you by Super Global Mega Corp .com