Xref: utzoo unix-pc.general:3339 comp.sys.att:7020 comp.sources.d:3877 Path: utzoo!censor!becker!news From: news@becker.UUCP (UseNet News) Newsgroups: unix-pc.general,comp.sys.att,comp.sources.d Subject: revised "arbitron" Message-ID: <660@becker.UUCP> Date: 19 Jul 89 19:14:08 GMT Organization: G. T. S., Toronto, Ontario Lines: 266 I have decided to post a revised version of "arbitron" for News statistics gathering due to the recent discussion about "unix-pc" and other groups being under-reported due to bugs in the script. --------- 8< --------- 8< --------- 8< --------- 8< --------- 8< --------- #! /bin/sh # @(#)arbitron 2.4.3 Mon Jun 5 05:03:55 EDT 1989 # arbitron -- this program produces rating sweeps for USENET. # # Usage: arbitron # # To use this program, edit the "configuration" section below so that the # information is correct for your site, and then run it. It will produce a # readership survey for your machine and mail that survey to decwrl.dec.com, # with a cc to you. # # To participate in the international monthly ratings sweeps, # run "arbitron" every month. I will run the statistics program on the first # day of each month; it will include any report that has reached it by that # time. To make sure your site's data is included, run the survey program no # later than the 20th day of each month. # # Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com # Updated and bugfixed by # Spencer Thomas, U.of Utah # Geoff Kuenning, SAH Consulting # Updated to work with 2.10.1 and older news systems by # Lindsay Cleveland, AT&T Technologies/Bell Labs # Made to work with 16-bit address spaces by # Andy Walker, Maths Dept., University of Nottingham, UK # Nagging Bourne shell bug fixed by # Tom Donahue, Rabbit Software Corp # Newsgroup inclusion bug fix by # Bruce Becker, G.T.S. # # Note that the results of this program are dependent on the rate at which # you expire news. If you are a small site that expires news rapidly, the # results may indicate fewer active readers than you actually have. # ########################################################################### # Configuration information. Edit this section to reflect your site data. # TMPDIR=/tmp NEWS=/usr/lib/news SPOOL=/usr/spool/news # Make a crude stab at determining the system type. If your installation has # only one type of system, you can edit out the "if" statement and just turn # this into an assignment statement of the correct value. if [ -d /usr/ucb ]; then STYPE="bsd" else STYPE="usg" fi # Range of /etc/passwd UID's that represent actual people (rather than # maintenance accounts or daemons or whatever) lowUID=100 highUID=9999 # If you aren't running a distributed news system (nntpd & rrn, usually), # leave NEWSHOST blank. Else set it to the name of the host from which you # can rcp a copy of the active file. NEWSHOST= # uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey summarypath="netsurvey@decwrl.dec.com $USER" # We need to find the uucp name of your host. If this code doesn't work, # then just put it in literally like this: # hostname="ihnp4" case $STYPE in bsd) cmd='hostname || uuname -l';; sysv) cmd='uname -n || uuname -l || hostname';; *) cmd='uuname -l';; esac hostname=`sh -c "$cmd" 2>&-` PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin ############################################################################ export PATH # --------------------------------------------------------------------------- trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15 set `date` dat="$2$6" destination="${MAILER-mail} $summarypath" ################################ # Here are several expressions, each of which figures out approximately how # many people use this machine. Comment out all but 1 of them; pick the one # you like best. Initially the most universal but least reliable of them is # uncommented. # # ###### Scheme #1: fast but usually returns too big a number nusers=`awk -F: ' BEGIN { N = 0 } $3 >= '"$lowUID"' && $3 <= '"$highUID"' { N = N + 1 } END { print N }' $TMPDIR/arb.fmt.$$ << 'DOG' # makereport -- utility for "arbitron". Early versions were copied from a # similar script distributed with "subscribers.sh" by Blonder, McCreery, and # Herron. # BEGIN { grpcount = 0 realusers = 0 } # # Active file line: dispose of previous group (if any), record group, and # record first and last article numbers. Set group's reader count to none. NF == 4 { if (grpname != "") { printf("%d %s\n", grpcount, grpname) } grpname = $1 grpfirst = $3 grplast = $2 grpcount = 0 } # # .newsrc line. Break out the final number, which is the last article that # has actually been read. This is a pretty good indicator of the person's # true interest in the group. If 'lastread' for the group is a current # (unexpired) article, record a reader for that group. Finally, record # the user as a "real" user of the news system. # NF == 3 { if ($1 != grpname) next; n1 = split($3, n2, "-") n3 = split(n2[n1], n4, ",") lastread = n4[n3] if ((grpfirst != grplast) && (lastread >= grpfirst) && \ (lastread <= grplast)) { grpcount++ if (realuser[$2] != 1) { realuser[$2] = 1 realusers++ } } } # # End of file. Print the report in 2 columns. END { # For reorganized network, report a group even if nobody reads it. # This will help us keep track of where the groups propagate. if (grpname != "") { printf("%d %s\n", grpcount, grpname) } printf("9999 Host\t\t%s\n", "HOSTNAME") printf("9998 Users\t\t%d\n", NUSERS) printf("9997 NetReaders\t%d\n", realusers) printf("9996 ReportDate\t%s\n", "DATE") printf("9995 SystemType\tnews-arbitron-2.4.3\n") } DOG cat >$TMPDIR/arb.pwd.$$ <<'MOUSE' BEGIN { seen["/"] = 1 seen[""] = 1 } { if (seen[$6] != 1) { printf("if [ -r %s/.newsrc ] ; then ", $6) printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n", $1, $6) seen[$6] = 1 } } MOUSE # First, make sure we have an active file if [ -z "$NEWSHOST" ]; then ACTIVE=$NEWS/active else ACTIVE=/tmp/arb.active.$$ rcp $NEWSHOST:$NEWS/active $ACTIVE fi if [ ! -s $ACTIVE ]; then echo arbitron: ACTIVE file missing or empty. Cannot continue. exit 1 fi # Next, get the list of .newsrc files with duplicates and unreadable files # removed. awk -F: -f $TMPDIR/arb.pwd.$$ $TMPDIR/arb.tmp.$$ # Check to make sure that we found some if [ -s $TMPDIR/arb.tmp.$$ ]; then # See if "active" file has 4 fields or only two (pre-2.10.2) set `sed 1q < $ACTIVE` if [ $# -eq 2 ]; then egrep '^[a-z][-0-9_a-z]*\.' $ACTIVE | while read group last; do dir=`echo "$group" | sed 's;\.;/;g'` first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q` case $STYPE in usg) echo "$group $last ${first:-$last} X";; *) echo "$group $last ${first-$last} X";; esac done else egrep '^[a-z][-0-9_a-z]*\.' $ACTIVE fi | sort - $TMPDIR/arb.tmp.$$ | awk -f $TMPDIR/arb.fmt.$$ | sort -nr | sed '/^$/d /^[0-9]* to\./d s/^999[0-9] //' | $destination else echo Unable to find any readable .newsrc files 2>&1 exit 1 fi --------- 8< --------- 8< --------- 8< --------- 8< --------- 8< --------- Cheers, -- Bruce Becker Toronto, Ont. Internet: news@becker.UUCP UUCP: ...!uunet!mnetor!becker!news "'Comefrom' considered useful" - the Anti-Dijkstra