Xref: utzoo alt.sources:3278 comp.lang.perl:4185 Path: utzoo!utgpu!cs.utexas.edu!bcm!dimacs.rutgers.edu!seismo!uunet!mcsun!unido!isaak!escher!nadia!smurf!flatlin!ppcger!Sepp From: Sepp@ppcger.ppc.sub.org (Josef Wolf) Newsgroups: alt.sources,comp.lang.perl Subject: Re: An AWK script to check "junk" for newsgroups Message-ID: Date: 19 Feb 91 19:41:56 GMT References: <112753@mamnix.hamm.sub.org> <~Y+&#J#@ads.com> Lines: 140 dglo@ADS.COM (Dave Glowacki) writes: ] Since, as a rule, EVERY C or shell program posted must be followed up ] by a PERL script, here's my version of NEWJUNK. Well. Fine fine. But what about using standard-tools? Which *IX is _delivered_ with Perl? Now here is my version of NEWJUNK. It could have been better, but older versions of gawk have these ugly memory-leak, so you have to sort out the 'Newsgroups:' lines and pipe it into gawk :-( The awk-version will most likely be slower than the C-version and the Perl-Version, but it schould run on most *IX with little modifications. This version uses 3 config files: /usr/lib/news/newjunk.active this are the newsgroups, I am interested /usr/lib/news/newjunk.trash to throw away the entire Newsgroups:-line /usr/lib/news/newjunk.junk Newsgroups, I don't want In the config-files you can use regular expressions. Here is my newjunk.active, for example: ---- snipp ---- # newjunk.active # # these are the newsgroups I want to have complete, if they will be # found in junk ^comp\.sys\..* ^comp\.os\..* ^comp\.mail.* ^dnet\..* ^eunet\..* ^mnet\..* ^ppc\..* ^sub\..* # I want all.sources.all .*\.sources.* # and all.os9.all .*\.os9\..* ---- snipp ---- Here goes newjunk.awk. Just pipe all '^Newsgroups:' into 'awk -f newjunk.awk' ---- snipp--- BEGIN { # read in active FS = ":"; # ^^^ my news-system needs this one while (getline <"/usr/lib/news/active" > 0) if (length ($1)) active [activecount++] = $1; # read in config files while (getline tmp <"/usr/lib/news/newjunk.active" > 0) if (length (tmp) && !match (tmp, "^#")) nactive [nactivecount++] = tmp; while (getline tmp <"/usr/lib/news/newjunk.trash" > 0) if (length (tmp) && !match (tmp, "^#")) trash [trashcount++] = tmp; while (getline tmp <"/usr/lib/news/newjunk.junk" > 0) if (length (tmp) && !match (tmp, "^#")) junk [junkcount++] = tmp; FS = ","; # newsgroups are separated with kommas } function insert_newsgroup(ng) { # if newsgroup is alraedy inserted, we can save some time for (k = 0; k < newcount; k++) if (ng == newgroups [k]) return; # skip newsgroup if it is already active for (k = 0; k < activecount; k++) if (ng == active [k]) return; # insert newsgroup newgroups [newcount++] = ng; } // { # check every newsgroup given in input line for (j = 1; j <= NF; j++) { # do we want this newsgroup? for (i = 0; i < nactivecount; i++) { if (match ($j, nactive [i])) { insert_newsgroup($j); # break; # don't know why I get some bus-error at this break -- sigh! # but the script runs without this too (grinn :-) } } # is there any trash-newsgroup? for (i = 0; i < trashcount; i++) if (match ($j, trash [i])) next; # no trash-groups -> sort out the junk-newsgroups to_insert_count = 0; for (i = 0; i < junkcount; i++) if (!match ($j, junk [i])) to_insert [to_insert_count++] = $j; } # insert them now for (i = 0; i < to_insert_count; i++) insert_newsgroup(to_insert [i]); } END { for (i = 0; i < newcount; i++) { # insert the command for YOUR inews here cmd = "inews -ad=local '-c=newgroup:" newgroups[i] "'