Xref: utzoo news.software.b:2681 comp.sources.d:3919 Path: utzoo!utgpu!jarvis.csri.toronto.edu!mailrus!ames!vsi1!lmb From: lmb@vicom.com (Larry Blair) Newsgroups: news.software.b,comp.sources.d Subject: Improved statistics script for C News w/logging patch Message-ID: <1989Jul31.053214.3554@vicom.com> Date: 31 Jul 89 05:32:14 GMT Organization: VICOM Systems Inc., San Jose, CA Lines: 422 Having run the awk script that I posted last month for a while now, I've come across a couple problems that needed correcting. C News accepts articles that have whitespace in their Message-ID's. That screws up the field count for awk. I also forgot to account for "moderated" in newgroup messages. Since this script is useless without the logging patch, it is included here first. *** relay/history.c.org Sat Jun 17 23:14:20 1989 --- relay/history.c Mon Jun 26 14:39:33 1989 *************** *** 184,191 **** if (startlog) { timestamp(stdout, &now); ! if (printf(" %s + %s", sendersite(nullify(art->h.h_path)), ! msgid) == EOF) fulldisk(art, "stdout"); } else now = time(&now); --- 184,199 ---- if (startlog) { timestamp(stdout, &now); ! if(art->h.h_ngs == NULL) { ! if (printf(" %s f %s", ! sendersite(nullify(art->h.h_path)), msgid) == EOF) ! fulldisk(art, "stdout"); ! } else if (printf(" %s %c %s %s", ! sendersite(nullify(art->h.h_path)), ! (art->h.h_ctlcmd) ? 'c' : '+', msgid, ! art->h.h_ngs) == EOF) ! fulldisk(art, "stdout"); ! if ( art->h.h_ctlcmd && printf(" %s", art->h.h_ctlcmd) == EOF) fulldisk(art, "stdout"); } else now = time(&now); -------------------------------------------------------------------- # USAGE: awk -f report_awk /usr/lib/news/log # AWK script which eats netnews log files and produces a summary of USENET # traffic over the period of time that the log was collected. # # C news version - for use with log file patches # # 6/30/89 # # Erik E. Fair # Original Author, May 22, 1984 # # Brad Eacker # Modified to simplify the record processing and to sort the output. # # Erik E. Fair # Modifed to provide information about control messages. # # Erik E. Fair # Bug in system name extraction fixed. It was assumed that the forth field # (system name) always had a dot. local is one that doesn't. Some others # (including 2.9 sites) don't either. # # Earl Wallace # The "sent" field was changed from $5 to $6 in 2.10.2 (beta) # named "newstats" and called with no arguments. # # Erik E. Fair # Remove support for 2.10.1, revise for 2.10.2 to provide information # about junked articles, garbled articles, and bad newsgroups # # Erik E. Fair # Minor bug fix to bad newsgroup reporting, also now counting ``old'' # articles as junked, with counter for number that are `old'. # # Erik E. Fair # Fix up the domain & local hosts support # # Erik E. Fair # Fix up the counting of gatewayed material, add counting of "linecount" # problems. Additional cleanup to make things faster. # # Larry Blair # Rewritten for C news with modified logging. Removed many of the B news # counts, such as linecount mismatch. # BEGIN{ # "ourname" is the C news name of our system. The old lprefix stuff # doesn't apply for C news, since a common naming scheme is provided. ourname = "vsi1"; # # For phony name, create real entries. They divide into two classes. # Most are additive. Some are subtractive, meaning that when the phony # group appears, you need to subtract for a site that was added to # in a previous alias. # # This stuff is used if you are running a group batching scheme with # a phony site name. We also use it to map stuff sent to "news", # which is ames' netnews system. # # Example: # alias_add[leaf_main]="sitea,siteb,sitec" # alias_sub[leaf_rest]="sitec" # # leaf_main would be attributed to sitea, siteb, and sitec # leaf_main, leaf_rest would be attriubted to sitea and siteb # alias_add["leaf_main"]="daver,teraida,zorch,frame,ubvax,octela,altos86" alias_sub["leaf_rest"]="zorch" alias_add["news"]="ames" # If you do bi-directional USENET gatewaying (e.g. mailing list # to newsgroup where the material flows both ways freely), this # should be the name in the sys file that you use to mail stuff # to the mailing lists. # # NOTE: I have not tested this stuff with C news. {lmb} # pseudo = "internet"; rptname = "(GATEWAY)"; # # Top level domain names and what network they represent # (for use in counting stuff that is gatewayed) # domains["ARPA"] = rptname; domains["arpa"] = rptname; domains["EDU"] = rptname; domains["edu"] = rptname; domains["GOV"] = rptname; domains["gov"] = rptname; domains["COM"] = rptname; domains["com"] = rptname; domains["MIL"] = rptname; domains["mil"] = rptname; domains["ORG"] = rptname; domains["org"] = rptname; domains["NET"] = rptname; domains["net"] = rptname; domains["UK"] = rptname; domains["uk"] = rptname; domains["DEC"] = rptname; domains["dec"] = rptname; domains["CSNET"] = rptname; domains["csnet"] = rptname; domains["BITNET"] = rptname; domains["bitnet"] = rptname; domains["MAILNET"] = rptname; domains["mailnet"] = rptname; domains["UUCP"] = rptname; domains["uucp"] = rptname; domains["OZ"] = rptname; domains["oz"] = rptname; domains["AU"] = rptname; domains["au"] = rptname; # # tilde chosen because it is ASCII 126 (don't change this) # invalid = "~~~~~~"; # accept[invalid] = 0; reject[invalid] = 0; xmited[invalid] = 0; control[invalid] = 0; junked[invalid] = 0; tossed[invalid] = 0; neighbor[invalid] = 0; canfail = 0; } { # Henry says that whitespace in Message-ID's is ok. Awk doesn't # like that, so we just won't count those ones. if(substr($6, length($6), 1) != ">") next; # # Get the name of the system that did this, # taking into account that not everyone believes in domains. # [[This stuff is extraneous for C news ]] # # if we get a route addr (we shouldn't, but...), take the last one # [[Particularly with C news - lmb]] # nhosts = split($4, hosts, "@"); hostname = hosts[nhosts]; # # get the root domain name, and the hostname # ndoms = split(hostname, doms, "."); domain = doms[ndoms]; sys = doms[1]; # # check for local system, and if not that, then internet sites. # special case the network name replacement of specific host names, # such that the network name is there only on a `local' posting # (which is really gatewaying in disguise) # if(sys == ourname) { sys = "local"; } else { dom = domains[domain]; if (dom) sys = dom; } } # # Accepted articles. Count the newsgroups and who we sent it to. # $5 == "+" { accept[sys]++; neighbor[sys] = 1; nng = split($7, ngl, ","); for(i = 1; i <= nng; i++) { dot = index(ngl[i], "."); if (dot) ng = substr(ngl[i], 1, (dot - 1)); else ng = ngl[i]; if (ng) newsgcnt[ng]++; } for(j = 8; j <= NF; j++) { if ($(j) == pseudo) $(j) = rptname; else neighbor[$(j)] = 1; xmited[$(j)]++; } next; } # # Rejected article. At this point, we just count them. The "tossed" # count is for groups that were "x'ed" in the active file, but it's # not currently being printed in the report. This section should # be expanded. # $5 == "-" { reject[sys]++; if($7 == "all") tossed[sys]++; next; } # These are the cancels that preceed the article being cancelled. # Erik used to call the "failed", so I left it alone. Note that # the cancel has already been counted on the "c" line. # $5 == "f" { canfail++; next } # # Count the junk. # $5 == "j" { junked[sys]++; next } # # Control messages. This is not fully tested; there may be some # others that use more than one field. # $5 == "c" { ctot++; accept[sys]++; control[sys]++; ctlcnt[$(8)]++; j = 9; if($8 == "cancel" || $8 == "rmgroup") j = 10; else if($8 == "newgroup") { if ($10 == "moderated") j = 11; else j = 10; } for( ; j <= NF; j++) { if ($(j) == pseudo) $(j) = rptname; else neighbor[$(j)] = 1; xmited[$(j)]++; } next; } # # Summarize and print the report # END{ # special processing for Duplicates, because we can't tell if # they came from a netnews neighbor or from the gatewaying # activities until we have processed the entire log. # for( hostname in reject ) { # # get the root domain name, and the hostname # ndoms = split(hostname, doms, "."); domain = doms[ndoms]; sys = doms[1]; if (! neighbor[sys]) { if (sys == ourname) { sys = "local"; } else { dom = domains[domain]; if (dom) sys = dom; } } i = reject[hostname]; reject[hostname] = 0; reject[sys] += i; } rtot = 0; for( i in reject ) { if (reject[i] > 0) { list[i] = 1; rtot += reject[i]; } } atot = 0; for( i in accept ) { list[i] = 1; atot += accept[i]; } xtot = 0; for( i in xmited ) { if(alias_add[i] != "") { split(alias_add[i], ala, ","); for (j in ala) { list[ala[j]] = 1; xmited[ala[j]] = xmited[i]; } xmited[i] = 0; continue; } if(alias_sub[i] != "") { split(alias_sub[i], als, ","); for (j in als) { xmited[als[j]] -= xmited[i]; } xmited[i] = 0; } } for( i in xmited ) { if(xmited[i] != 0) list[i] = 1; xtot += xmited[i]; } ctot = 0; for( i in control ) { list[i] = 1; ctot += control[i]; } jtot = 0; for( i in junked ) { list[i] = 1; jtot += junked[i]; } # # ctot is part of rtot, so we don't add it in to the grand total. # totarticles = atot + rtot; if (totarticles == 0) totarticles = 1; printf("\nSystem \tAccept\tReject\tJunked\tXmit to\tControl\t%% total\t%% rejct\n"); for( ; ; ) { # selection sort i = invalid; for( j in list ) { if ( list[j] > 0 && j < i ) i = j; } if ( i == invalid ) break; list[i] = 0; # # control & junked are counted under accept. # sitetot = accept[i] + reject[i]; if (sitetot == 0) sitetot = 1; articles[i] = sitetot; # # What an 'orrible printf spec # printf("%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", i, accept[i], reject[i], junked[i], xmited[i], control[i], (sitetot * 100) / totarticles, (reject[i] * 100) / sitetot); # } printf("\nTOTALS \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", atot, rtot, jtot, xtot, ctot, 100, (rtot * 100) / totarticles); printf("\nTotal Articles processed %d", totarticles); printf("\n"); if (ctot) { printf("\nControl Invocations\n"); for( i in ctlcnt ) { if (i == "cancel") { printf("%-12s %6d", i, ctlcnt[i]); if (canfail) printf(", %d failed", canfail); printf("\n"); } else { printf("%-12s %6d\n", i, ctlcnt[i]); } } } if (atot) { printf("\nNetnews Categories Received\n"); l = 0; for( i in newsgcnt ) { if (l < length(i)) l = length(i); } fmt = sprintf("%%-%ds %%6d\n", l); for( ; ; ) { # selection sort max = 0; for( j in newsgcnt ) { if (newsgcnt[j] > max) { i = j; max = newsgcnt[j]; } } if (max == 0) break; printf(fmt, i, newsgcnt[i]); newsgcnt[i] = 0; } } } -- Larry Blair ames!vsi1!lmb lmb@vicom.com