Path: utzoo!utstat!helios.physics.utoronto.ca!jarvis.csri.toronto.edu!me!eastick
Newsgroups: news.software.b
From: eastick@me.utoronto.ca (Doug Eastick)
Subject: Re: Wanted: C news log file report generator
Message-ID: <1990Feb16.110903.28825@me.toronto.edu>
Copyright: none
Organisation: U of Toronto, Dept. of Mechanical Engineering
References: <25DBCBAC.5715@deimos.cis.ksu.edu>
Date: 16 Feb 90 16:09:03 GMT

tar@ksuvax1.cis.ksu.edu (Tim Ramsey) writes:
>I'm looking for an awk/perl script that will produce a report summary from
>the C news log file.  Does anyone have something they could send to me?

I assume there will be some ``me too'' requests, so here's the one I
grabbed a while ago.

-----

#!/usr/local/bin/perl
#
# C News Log Report Generator
#
# Mark Nagel <nagel@ics.uci.edu>
# $Id: rep_log.pl,v 1.6 89/11/29 11:00:51 news Exp $
#
# Large parts of this script were based on the B News log report
# awk script.
#

$NEWSBIN = $ENV{"NEWSBIN"} || "/usr/lib/newsbin";
$NEWSCTL = $ENV{"NEWSCTL"} || "/usr/lib/news";
$newshist = "$NEWSBIN/maint/newshist";

##############################################################################
# 				customization				     #
##############################################################################

#
# The MAXARGLEN variable controls how many message-ids will be queried
# for at one time via the newshist program.  Tune to your system (make
# as large as allowed).  The length here is the total length in
# characters of all the arguments.
#
$MAXARGLEN = 2048;

#
# The "local" array contains a list of regular expressions that
# identify a site entry in the log file as local.  Each regular
# expression will be matched case-independently and anchored at the
# beginning/end.
#
@local = (
  "me",			# news server name
  "[^.]*.ics.uci.edu"		# other local client names
);

#
# The "gateway" array contains a list of regular expressions that
# identify a site entry in the log file as a gateway.  Each regular
# expression will be matched case-independently and anchored at the
# beginning/end.
#
@gateway = (
  "local-.*",
  "gateway"
);

##############################################################################
# 			       initialization				     #
##############################################################################

$duplicates = 0;
@msgids = ();
$arglen = 0;

$silent = 0;
while ($_ = $ARGV[0], /^-/) {
  shift;
  last if (/^--$/);
  /^-s/ && ($silent = 1);
}

##############################################################################
# 				log file scan				     #
##############################################################################

while (<>) {
  next if /^$/;			# skip blank lines
  chop;

  #
  # extract fields from line
  #
  ($month,$date,$time,$site,$code,$msgid,@logent) = split;

  #
  # fix up the site name as necessary
  #
  for $regexp (@gateway) {
    if ($site =~ /^$regexp$/i) {
      $site = "(GATEWAY)";
      last;
    }
  }
  for $regexp (@local) {
    if ($site =~ /^$regexp$/i) {
      $site = "local";
      last;
    }
  }
  # $site =~ s/\..*$//;

  #
  # check the receipt code
  #
  if ($code eq "-") {			# rejected article
    $reject{$site}++;
    if ($logent[0] eq "duplicate") {
      $duplicates++;
    } elsif ($logent[0] eq "no" && $logent[1] eq "subscribed") {
      #
      # "no subscribed groups in `...'"
      #
      $ng = $logent[4];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $unsub{$i}++;
      }
    } elsif ($logent[0] eq "all" && $logent[3] eq "excluded") {
      #
      # "all groups `...' excluded in active"
      #
      $ng = $logent[2];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $excluded{$i}++;
      }
    } else {
      #
      # print any others as-is for inspection
      #
      print "$_\n" unless ($silent);
    }
  } elsif ($code eq "+") {		# accepted article
    $accept{$site}++;
    if ($arglen + length($msgid) > $MAXARGLEN) {
      do recordgroups(@msgids);
      @msgids = ($msgid);
      $arglen = length($msgid);
    } else {
      push(@msgids, $msgid);
      $arglen += length($msgid);
    }
    for ($i = 0; $i <= $#logent; $i++) {
      $n = $logent[$i];
      $neighbor{$n} = 1;
      $xmited{$n}++;
    }
  } elsif ($code eq "j") {		# junked after accepted
    $junked{$site}++;
    if ($logent[0] eq "junked") {
      $ng = $logent[4];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $badng{$i}++;
      }
    }
  } elsif ($code eq "i") {		# ihave message
    $ihave++;
  } elsif ($code eq "s") {		# sendme message
    $sendme++;
  } else {				# illegal/unknown code
    print "$_\n" unless ($silent);
  }
}
do recordgroups(@msgids) if ($#msgids >= 0);

##############################################################################
# 			    statistics generation			     #
##############################################################################

#
# rejected messages
#
$rtot = 0;
while (($key, $val) = each(reject)) {
  if ($val > 0) {
    $list{$key} = 1;
    $rtot += $val;
  }
}

#
# accepted messages
#
$atot = 0;
while (($key, $val) = each %accept) {
  if ($val > 0) {
    $list{$key} = 1;
    $atot += $val;
  }
}

#
# transmitted messages
#
$xtot = 0;
while (($key, $val) = each(xmited)) {
  if ($val > 0) {
    $list{$key} = 1;
    $xtot += $val;
  }
}

#
# junked messages
#
$jtot = 0;
while (($key, $val) = each(junked)) {
  if ($val > 0) {
    $list{$key} = 1;
    $jtot += $val;
  }
}

##############################################################################
# 			      report generation				     #
##############################################################################

#
# Transmission Statistics
#
$totalarticles = $atot + $rtot;
$totalarticles++ if ($totalarticles == 0);
print "\n" unless ($silent);
print "System      \tAccept\tReject\tJunked\tXmit to\t %total\t%reject\n";
for $i (sort(keys(list))) {
  $sitetot = $accept{$i} + $reject{$i};
  $sitetot++ if ($sitetot == 0);
  $articles{$i} = $sitetot;

  printf "%-14.14s\t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n",
	$i, $accept{$i}, $reject{$i}, $junked{$i}, $xmited{$i},
	($sitetot * 100) / $totalarticles, ($reject{$i} * 100) / $sitetot;
}
printf "\nTOTALS        \t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n",
	$atot, $rtot, $jtot, $xtot, 100, ($rtot * 100) / $totalarticles;
print "\nTotal Articles processed $totalarticles";
print " (1 duplicate)" if ($duplicates == 1);
print " ($duplicates duplicates)" if ($duplicates > 1);
print "\n";

#
# Netnews Categories
#
if ($atot > 0) {
  print "\nNetnews Categories Received\n";
  $l = 0;
  for $i (keys(ngcount)) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  while (1) {
    $max = 0;
    for $j (keys(ngcount)) {
      if ($ngcount{$j} > $max) {
	$max = $ngcount{$j};
	$i = $j;
      }
    }
    last if ($max == 0);
    printf $fmt, $i, $ngcount{$i};
    $ngcount{$i} = 0;
  }
}

#
# Bad Newsgroups
#
@keys = sort(keys(badng));
if ($#keys >= 0) {
  print "\nBad Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $badng{$i};
  }
}

#
# Unsubscribed Newsgroups
#
@keys = sort(keys(unsub));
if ($#keys >= 0) {
  print "\nUnsubscribed Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $unsub{$i};
  }
}

#
# Excluded Newsgroups
#
@keys = sort(keys(excluded));
if ($#keys >= 0) {
  print "\nExcluded Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $excluded{$i};
  }
}

##############################################################################
# recordgroups(msgid)
#
# Given a list of message-ids, retrieve the newsgroups associated with each
# message-id and update the global ngcount table appropriately.

sub recordgroups {
  local(@msgids) = @_;
  local($i, @groups);

  for ($i = 0; $i <= $#msgids; $i++) {
    $msgids[$i] =~ s/<([^>]*)>/$1/;
  }
  open(NH, "-|") || exec $newshist, '--', @msgids;
  while (<NH>) {
    chop;
    ($_, $_, @groups) = split;
    foreach $i (@groups) {
      $i =~ s/\/.*$//;
      if ($i =~ /\./) {
        $i =~ s/\..*//;
        $ngcount{$i}++;
      }
    }
  }
  close(NH) || warn("exec($newshist): $!\n");
}
--
Doug Eastick -- eastick@me.utoronto.ca