Xref: utzoo news.software.b:7813 comp.lang.perl:5373 Path: utzoo!utgpu!news-server.csri.toronto.edu!bonnie.concordia.ca!uunet!world!spike From: spike@world.std.com (Joe Ilacqua) Newsgroups: news.software.b,comp.lang.perl Subject: Arbitron in PERL Message-ID: <1991May19.215155.2205@world.std.com> Date: 19 May 91 21:51:55 GMT Organization: Software Tool & Die Lines: 176 This is a perl version of the arbitron program. I wrote parbitron because SUNOS 4.0.3's awk kept core dumping on long .newsrc lines (under SUNOS 4.1 in prints 'line too long' and aborts). I would not say this is faster, but it does work. I have shown this to Brian Reid, and felt that parbitron's output was exceptable. HOWEVER: I STRONGLY RECOMMEND that you run parbitron with "$summarypath" set to YOUR email address AND that you compare the output with that of the shell version before you set it loose. Bugs, complaints, and improvements to me. ->Spike (Spike@World.STD.com) #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh parbitron <<'END_OF_parbitron' X#!/usr/bin/perl X# parbitron -- a perl version of the program produces rating sweeps for USENET. X X# To participate in the international monthly ratings sweeps, X# run "arbitron" every month. Brian Reid runs the statistics program on the X# first day of each month; it will include any report that has reached it by X# that time. To make sure your site's data is included, run the survey X# program no later than the 20th day of each month. X X# This version of arbitron was written by Spike (Joe Ilacqua), X# spike@world.std.com. It seemed like the right thing to do at the time. X X# Arbitron was originally written by Brian Reid, DEC Western Research Lab, X# reid@decwrl.dec.com] X X# Notes: The Perl version of arbitron intentionally does not support: X# NN's "~/.nn/rc" file, the current version of NN uses the ".newsrc". X# Old B News' 2 field active files. X# You should upgrade your software, or run the shell version of arbitron. X# X# As with the shell arbitron, the results of this program are dependent X# on the rate at which you expire news. If you are a small site that X# expires news rapidly, the results may indicate fewer active readers X# than you actually have. X X# Who to send the report to: X# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey X$summarypath = 'netsurvey@decwrl.dec.com'; X X# Range of /etc/passwd UID's that represent actual people (rather than X# maintenance accounts or daemons or whatever) X$lowUID = 5; X$highUID = 9999; X X# If you need to get the active file from another host define activehost. X# The user running parbitron must be able to rsh(1) to the remote host. X#$activehost = 'foo'; X X$active = '/usr/lib/news/active'; X$active = "rsh $activehost cat $active|" if ($activehost); X X$users = 0; # Users who could read news. X$newsreaders = 0; # Users who do read news. X Xchop($date = `date`); X($wday,$mon,$day,$hour,$tz,$year) = split(' ',$date); X$dat="$mon$year"; X X# One of these should return the hostname. Xchop($hostname = `hostname || uname -n || uuname -l`); X Xopen(ACTIVE,$active) || die "Can't open active file: $!\n"; X Xwhile() X{ X next unless /^[a-z][-0-9_a-z]*\./; # from shell arbitron X ($group,$maximum,$minimum) = split; X $groupcount{$group} = 0; X $groupmax{$group} = $maximum; X $groupmin{$group} = $minimum; X} Xclose(ACTIVE); X Xwhile (($user,$pass,$uid,$gid,$quota,$com,$gcos,$dir) = getpwent) { X next if ($uid < $lowUID) || ($uid > $highUID); X $users++; X X next if $homes{$dir}; # Don't do a .newsrc twice X $homes{$dir} = 1; X next if (! -r "$dir/.newsrc"); X open(NEWSRC,"$dir/.newsrc") || next; # This shouldn't fail X X $counted = 0; X X while() { X next if (!/: [0-9]/); X ($group,$arts) = split; X $group =~ s/://; X next unless defined($groupcount{$group}); # bogus group X next if $hits{$group}; # Don't count a group twice X $hits{$group} = 1; X X $maximum = $groupmax{$group}; X $minimum = $groupmin{$group}; X next if $minimum == $maximum; # No articles if $minimum == $maximum X X# We want the last article read from the line in the .newsrc, it is X# a comma septated number or range (i.e ...,415 or ...,3001-3078) X X @arts = split(',',$arts); # Split the line up on ","s X# Spilt the last element on "-" if need be X @arts = split('-',$arts[$#arts]) if ($arts[$#arts] =~ /-/); X if (($arts[$#arts] >= $groupmin{$group}) X && ($arts[$#arts] <= $groupmax{$group})) { X $groupcount{$group}++; X if (!$counted) { X $newsreaders++; # We have found another reader of news X $counted++; # only count them once! X } X } X } X undef %hits; X close(NEWSRC); X} X Xundef %groupmax; Xundef %groupmin; Xundef %homes; X X$i = 0; X Xwhile (($group,$count) = each %groupcount) { X $tosort[$i++] = "$count $group"; X} X Xundef %groupcount; X Xsub nr { # test like 'sort -nr' for sort function X ($anum,$astring) = split(' ',$a); X ($bnum,$bstring) = split(' ',$b); X if ($anum != $bnum) { -($anum <=> $bnum); } X else {-($astring cmp $bstring);} X} X X@sorted = sort nr @tosort; # sort most read to least X Xopen(MAIL,"|/bin/mail $summarypath"); X Xprint MAIL "Host\t\t$hostname\n"; Xprint MAIL "Users\t\t$users\n"; Xprint MAIL "NetReaders\t$newsreaders\n"; Xprint MAIL "ReportDate\t$dat\n"; Xprint MAIL "SystemType\tnews-perl-arbitron-2.4\n"; Xprint MAIL join("\n",@sorted), "\n"; # output the sorted data X Xclose(MAIL); END_OF_parbitron if test 4314 -ne `wc -c