Path: utzoo!utgpu!news-server.csri.toronto.edu!rpi!uwm.edu!wuarchive!psuvax1!news From: flee@cs.psu.edu (Felix Lee) Newsgroups: comp.lang.perl Subject: Re: ngmatch in perl Message-ID: <6h5G1s&y1@cs.psu.edu> Date: 26 Apr 91 09:12:59 GMT References: Sender: news@cs.psu.edu (Usenet) Organization: ~/News/org Lines: 112 Nntp-Posting-Host: dictionopolis.cs.psu.edu >Strange thing is, it is as slow as hell, about 2 match/sec on 20Mips sparc. This is because Perl is a terribly inefficient at string processing (half smiley). Below, the version of ngmatch in Perl that I use. It's at least an order of magnitude faster. The code is quite ugly. -- Felix Lee flee@cs.psu.edu #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 'z.pl' <<'END_OF_FILE' Xshift, $ngmatch'debug = 1 if $ARGV[0] eq '-d'; Xdo 'ngmatch.pl'; X X$pattern = "gnu,comp,news,sci,rec,misc,soc,talk,alt,to.melba,aus,melb,world,general,!talk.all,!soc.all,soc.culture.australian,soc.culture.jewish,soc.culture.arabic,soc.culture.esperanto,soc.religion.islam,soc.history,soc.men,soc.motss,soc.women,soc.feminism,!rec.all,rec.humor,rec.games.hack,rec.sport.cricket,rec.pets,rec.games.trivia,rec.arts.movies.reviews,!alt.all,alt.humor.oracle,alt.sources,alt.security,alt.sys.sun,alt.folklore,alt.religion,!sci.all,sci.space,sci.military,sci.med.aids,sci.econ,sci.env ironment,sci.energy,sci.skeptic,comp.all,!comp.binaries,!comp.all.mac,!comp.all.amiga,!comp.all.atari,!comp.mail.maps,!comp.os,!comp.sys,comp.sys.sun,comp.sys.ibm,!talk.all,all.misc"; X X&ngmatch'compile('match', $pattern); X X$| = 1; Xwhile (<>) { X if (&match($_)) { X print '+'; X } else { X print '-'; X } X} END_OF_FILE if test 874 -ne `wc -c <'z.pl'`; then echo shar: \"'z.pl'\" unpacked with wrong size! fi # end of 'z.pl' fi if test -f 'ngmatch.pl' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'ngmatch.pl'\" else echo shar: Extracting \"'ngmatch.pl'\" \(1453 characters\) sed "s/^X//" >'ngmatch.pl' <<'END_OF_FILE' Xpackage ngmatch; X X# &ngmatch_compile($name, $pattern) X# X# Create a new subroutine called $name: X# &$name($group) X# that returns true if $group matches $pattern. X# $pattern is something like "comp,!comp.sys,comp.sys.mac". X# "all" can be used to wildcard any single component, like "comp.all.mac". X# Any pattern, like "comp", will also match "comp.all", "comp.all.all", etc. X Xsub ngmatch'compile { X local($name, $pattern) = @_; X local(@n, $_, $match, $flag, $package); X X @n = split(/,/, $pattern); X X grep(( X s/\./,/g, # change periods to commas X s/([^!,\w])/\\$1/g, # quote special characters X s/(,all)$/(,[^,]*|)/, # trailing ".all" X s/$/(,.*|)$/, # implicit tail X s/^(!?)all,/\1([^,]*),/,# leading "all." X s/,all,/,([^,]*),/g, # embedded ".all." X s/,/\\./g, # change commas to quoted periods. X ), @n); X X # Now we build a series of tests. X $match = ''; X for $_ (@n) { X $flag = !s/^!//; X s/^/^/; # anchor the pattern. X $match .= '@x = /'.$_.'/;'; X $match .= "\nprint '".$_." '".',@x+0," @x\n";' if $debug; X $match .= 'if (($x = @x) && ($x *= 19) < $best) { X $x += ($x[$#x] =~ tr/././) * 20; X $result = '.$flag.', $best = $x if $x < $best; X };'; X $match .= 'print " $x $best $result\n";' if $debug; X } X X local($package) = (caller)[0]; X X $_ = ' X package '.$package.'; X sub '.$name.' { X local($_) = @_; X local($result, $best, $x, @x) = (0, 1e20); X '.$match.' X $result; X }'; X eval "$_; 1" || die "ngmatch'compile: $@"; X} END_OF_FILE if test 1453 -ne `wc -c <'ngmatch.pl'`; then echo shar: \"'ngmatch.pl'\" unpacked with wrong size! fi # end of 'ngmatch.pl' fi echo shar: End of shell archive. exit 0