Path: utzoo!attcan!utgpu!jarvis.csri.toronto.edu!cs.utexas.edu!usc!orion.oac.uci.edu!uci-ics!nagel From: nagel@paris.ics.uci.edu (Mark Nagel) Newsgroups: alt.sources Subject: YACLRG (yet another C-news log report generator) Message-ID: <25742D41.28523@paris.ics.uci.edu> Date: 29 Nov 89 19:25:52 GMT Distribution: alt Organization: UC Irvine Department of ICS Lines: 387 Here is a report generator for straight C-news logs written in Perl. It has very few 3.0 dependencies and should work fine under 2.0.18. Specifically, I did not use dbmopen since many C-news sites might be using dbz instead. Like another report generator I saw, this one caches message-ids in a list to minimize the number of calls out to newshist. Seems to work quite fast considering the fact that C-news fails to provide useful information directly in the log file. Be sure to tune the program to your local system before trying this. There are three variables you will may to modify: $MAXARGLEN Set this to the largest number of characters that can appear in an argument list submitted to exec. The larger the better. @local Set this to a list of regular expressions that match sites that really should be local (like if you have sites posting via NNTP and you'd like them all to show up as local in the log file). @gateway Set this to a list of regular expressions that match sites that really should be gateway sites. For example, we have a bunch of mailing lists forwarded to local groups and the sending machine always shows up as 'local-', so I alias those names to be gateways. Usage rep_log [-s] logfile ... or rep_log [-s] < logfile The optional -s flag supresses the output of erroneous lines prior to the main statistics area -- this is useful for submitting log reports to regional statistics groups or whatever. The script can take all log file names on the command line or from standard input. The log report appears on standard output. Anyway, that's not much documentation, but hopefully it's enough to use the script. Please send any comments or suggestions my way -- I'd be happy to make this even better, faster, stronger... #! /bin/perl # # C News Log Report Generator # # Mark Nagel # $Id: rep_log.pl,v 1.6 89/11/29 11:00:51 news Exp $ # # Large parts of this script were based on the B News log report # awk script. # $NEWSBIN = $ENV{"NEWSBIN"} || "/usr/local/lib/newsbin"; $NEWSCTL = $ENV{"NEWSCTL"} || "/usr/local/lib/news"; $newshist = "$NEWSBIN/maint/newshist"; ############################################################################## # customization # ############################################################################## # # The MAXARGLEN variable controls how many message-ids will be queried # for at one time via the newshist program. Tune to your system (make # as large as allowed). The length here is the total length in # characters of all the arguments. # $MAXARGLEN = 2048; # # The "local" array contains a list of regular expressions that # identify a site entry in the log file as local. Each regular # expression will be matched case-independently and anchored at the # beginning/end. # @local = ( "uci-ics", # news server name "[^.]*.ics.uci.edu" # other local client names ); # # The "gateway" array contains a list of regular expressions that # identify a site entry in the log file as a gateway. Each regular # expression will be matched case-independently and anchored at the # beginning/end. # @gateway = ( "local-.*", "gateway" ); ############################################################################## # initialization # ############################################################################## $duplicates = 0; @msgids = (); $arglen = 0; $silent = 0; while ($_ = $ARGV[0], /^-/) { shift; last if (/^--$/); /^-s/ && ($silent = 1); } ############################################################################## # log file scan # ############################################################################## while (<>) { next if /^$/; # skip blank lines chop; # # extract fields from line # ($month,$date,$time,$site,$code,$msgid,@logent) = split; # # fix up the site name as necessary # for $regexp (@gateway) { if ($site =~ /^$regexp$/i) { $site = "(GATEWAY)"; last; } } for $regexp (@local) { if ($site =~ /^$regexp$/i) { $site = "local"; last; } } $site =~ s/\..*$//; # # check the receipt code # if ($code eq "-") { # rejected article $reject{$site}++; if ($logent[0] eq "duplicate") { $duplicates++; } elsif ($logent[0] eq "no" && $logent[1] eq "subscribed") { # # "no subscribed groups in `...'" # $ng = $logent[4]; $ng =~ s/`([^']*)'/$1/; @ng = split(/,/, $ng); for $i (@ng) { $unsub{$i}++; } } elsif ($logent[0] eq "all" && $logent[3] eq "excluded") { # # "all groups `...' excluded in active" # $ng = $logent[2]; $ng =~ s/`([^']*)'/$1/; @ng = split(/,/, $ng); for $i (@ng) { $excluded{$i}++; } } else { # # print any others as-is for inspection # print "$_\n" unless ($silent); } } elsif ($code eq "+") { # accepted article $accept{$site}++; if ($arglen + length($msgid) > $MAXARGLEN) { do recordgroups(@msgids); @msgids = ($msgid); $arglen = length($msgid); } else { push(@msgids, $msgid); $arglen += length($msgid); } for ($i = 0; $i <= $#logent; $i++) { $n = $logent[$i]; $neighbor{$n} = 1; $xmited{$n}++; } } elsif ($code eq "j") { # junked after accepted $junked{$site}++; if ($logent[0] eq "junked") { $ng = $logent[4]; $ng =~ s/`([^']*)'/$1/; @ng = split(/,/, $ng); for $i (@ng) { $badng{$i}++; } } } elsif ($code eq "i") { # ihave message $ihave++; } elsif ($code eq "s") { # sendme message $sendme++; } else { # illegal/unknown code print "$_\n" unless ($silent); } } do recordgroups(@msgids) if ($#msgids >= 0); ############################################################################## # statistics generation # ############################################################################## # # rejected messages # $rtot = 0; while (($key, $val) = each(reject)) { if ($val > 0) { $list{$key} = 1; $rtot += $val; } } # # accepted messages # $atot = 0; while (($key, $val) = each %accept) { if ($val > 0) { $list{$key} = 1; $atot += $val; } } # # transmitted messages # $xtot = 0; while (($key, $val) = each(xmited)) { if ($val > 0) { $list{$key} = 1; $xtot += $val; } } # # junked messages # $jtot = 0; while (($key, $val) = each(junked)) { if ($val > 0) { $list{$key} = 1; $jtot += $val; } } ############################################################################## # report generation # ############################################################################## # # Transmission Statistics # $totalarticles = $atot + $rtot; $totalarticles++ if ($totalarticles == 0); print "\n" unless ($silent); print "System \tAccept\tReject\tJunked\tXmit to\t %total\t%reject\n"; for $i (sort(keys(list))) { $sitetot = $accept{$i} + $reject{$i}; $sitetot++ if ($sitetot == 0); $articles{$i} = $sitetot; printf "%-14s\t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n", $i, $accept{$i}, $reject{$i}, $junked{$i}, $xmited{$i}, ($sitetot * 100) / $totalarticles, ($reject{$i} * 100) / $sitetot; } printf "\nTOTALS \t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n", $atot, $rtot, $jtot, $xtot, 100, ($rtot * 100) / $totalarticles; print "\nTotal Articles processed $totalarticles"; print " (1 duplicate)" if ($duplicates == 1); print " ($duplicates duplicates)" if ($duplicates > 1); print "\n"; # # Netnews Categories # if ($atot > 0) { print "\nNetnews Categories Received\n"; $l = 0; for $i (keys(ngcount)) { $l = length($i) if ($l < length($i)); } $fmt = "%-${l}s %d\n"; while (1) { $max = 0; for $j (keys(ngcount)) { if ($ngcount{$j} > $max) { $max = $ngcount{$j}; $i = $j; } } last if ($max == 0); printf $fmt, $i, $ngcount{$i}; $ngcount{$i} = 0; } } # # Bad Newsgroups # @keys = sort(keys(badng)); if ($#keys >= 0) { print "\nBad Newsgroups Received\n"; $l = 0; for $i (@keys) { $l = length($i) if ($l < length($i)); } $fmt = "%-${l}s %d\n"; for $i (@keys) { printf $fmt, $i, $badng{$i}; } } # # Unsubscribed Newsgroups # @keys = sort(keys(unsub)); if ($#keys >= 0) { print "\nUnsubscribed Newsgroups Received\n"; $l = 0; for $i (@keys) { $l = length($i) if ($l < length($i)); } $fmt = "%-${l}s %d\n"; for $i (@keys) { printf $fmt, $i, $unsub{$i}; } } # # Excluded Newsgroups # @keys = sort(keys(excluded)); if ($#keys >= 0) { print "\nExcluded Newsgroups Received\n"; $l = 0; for $i (@keys) { $l = length($i) if ($l < length($i)); } $fmt = "%-${l}s %d\n"; for $i (@keys) { printf $fmt, $i, $excluded{$i}; } } ############################################################################## # recordgroups(msgid) # # Given a list of message-ids, retrieve the newsgroups associated with each # message-id and update the global ngcount table appropriately. sub recordgroups { local(@msgids) = @_; local($i, @groups); for ($i = 0; $i <= $#msgids; $i++) { $msgids[$i] =~ s/<([^>]*)>/$1/; } open(NH, "-|") || exec $newshist, '--', @msgids; while () { chop; ($_, $_, @groups) = split; foreach $i (@groups) { $i =~ s/\/.*$//; if ($i =~ /\./) { $i =~ s/\..*//; $ngcount{$i}++; } } } close(NH) || warn("exec($newshist): $!\n"); } -- Mark Nagel UC Irvine Department of ICS +----------------------------------------+ ARPA: nagel@ics.uci.edu | Help! Somebody! I'm being oppressed! | UUCP: ucbvax!ucivax!nagel +----------------------------------------+ Brought to you by Super Global Mega Corp .com