Path: utzoo!utgpu!cs.utexas.edu!usc!ucsd!ucbvax!bloom-beacon!eru!hagbard!sunic!nuug!sigyn.idt.unit.no!news From: Harald.Eikrem@elab-runit.sintef.no Newsgroups: alt.sources Subject: Re: Kill file for mail. Keywords: Perl sorting unix mailbox Message-ID: <90Nov8.013502*Harald.Eikrem@elab-runit.sintef.no> Date: 8 Nov 90 00:35:02 GMT References: <1990Nov06.052117.26136@ddsw1.MCS.COM> <2258@exodus.Eng.Sun.COM> Sender: news@idt.unit.no (Usenet news admin) Organization: ELAB-RUNIT (SINTEF), Trondheim, Norway. Lines: 168 In-Reply-To: falk@peregrine.Eng.Sun.COM's message of 7 Nov 90 00:18:01 GMT Here's another approach to sorting mailboxes. Perl based, what else? There's no separate man page. It does not pipe messages to programs, because it is strictly a mail sorter. True improvements are of course welcomed. --Harald E ++++Perl script follows, save as `mailsort' in your favourite bin directory++++ #!/usr/local/bin/perl # Usage "mailsort [ MAILBOX... ]" Requires perl 3.0 patchlevel 4 up. # Sorts Unix mailbox file(s) into mailbox folders at ~/Mail/, # default input file is /usr{/spool,}/mail/. # Produces two kinds of folder summaries, one building up for each # folder (~/Mail/.sum/), and another about all new messages # from the last sort (~/Mail/.sum/NEW). # The sorting is based on a sort file (~/Mail/.fsort) # using the following conventions: # - Each line should contain a keyword and a folder name. # - The keyword and folder tokens are separated by one or more . # - A blank character in the keyword matches any number of whitespaces. # - Keyword matches are always case-INsensitive. # - Regular expressions are valid, but take care. Not everything is # going to work. # - Key order might be significant. # - Mail header reserved words should only be preceeded by a circumflex # (^) and end in a colon (:). There is no need to insert random string # matches in beetween the MHRW and the matching string, a single # space is suitable, e.g. "^From: henry@vcuhenry" # will match a From: header field with "henry@vcu" anywhere on that # line, the message is written to the folder named "henry". # "^To:" will match both To: and Cc: header records. # - Mail header continuation lines are not recognised on a regular basis. # - Mail header trace lines are not rendered in the folder decisions. # - Any other kind of keyword will match *anything* in the mail header. # - Default folder name is "inbox". # # Written 5. Feb 90 by Harald Eikrem, SINTEF, Trondheim, Norway. # (Some ideas lent from `from' script by Johan Vromans). # # PS. I am sure someone can optimise this algorithm. A perl novice I am. # # Modified 8. Feb 90 -- redesigned key struct # Modified 21. Mar 90 -- modified "^To:" keyword to match both To: and Cc:. # Modified 1. Apr 90 -- now handles absolute or relative folder file # referencing, e.g. /dev/null (this is no joke) if ( $#ARGV < 0 ) { if ( ! ($user = getlogin)) { @a = getpwuid($<); $user = $a[0]; } if ( -r "/usr/spool/mail/$user" ) { @ARGV = ("/usr/spool/mail/$user"); } elsif ( -r "/usr/mail/$user" ) { @ARGV = ("/usr/mail/$user"); } else { printf STDERR "No mail for $user.\n"; exit 1; } } $Maildir = $ENV{"HOME"}."/Mail"; if ( ! -d $Maildir ) { printf STDERR "No ~/Mail directory.\n"; exit 1; } $fsort = ".fsort"; if ( ! -r "$Maildir/$fsort" ) { printf STDERR "No sort file ~/Mail/$fsort\n"; exit 1; } # read sort file open(srt, "<$Maildir/$fsort"); for ($index = 1; $line = ; ++$index ) { chop($line); next if $line =~ /^#/ || $line =~ /^\s*$/; $line =~ /^([^\t]+)\t+(\S+)\s*$/; $key[$index] = $1; $val[$index] = $2 ? $2 : "inbox"; $key[$index] =~ s/^\^?([\w-]+\s*:)\s*(.+)/^$1[^\\n]*$2/; $key[$index] =~ s/\s+/\\s+/g; $key[$index] =~ s/(\[[^\]]*)\\s+/$1 /g; $key[$index] =~ s/\\\\s\+?/ /g; $key[$index] =~ s/^\^/\\n/; if (/^(\n)To(:.*)/i) { $key[++$index] = s//$1Cc$2/; } # printf "%3d KEY = %-42s VAL = %-s\n",$index,$key[$index],$val[$index]; } close(srt); $MAXKEY = $index-1; if ( ! -d "$Maildir/.sum" ) { mkdir("$Maildir/.sum", 0755); } # go through input file(s) while ( $line = <> ) { # scan until "From_" header found if ( $line !~ /^From\s+(\S+)\s+[^\n]*(\w{3}\s+\d+\s+\d+:\d+)/ ) { print fld $line if $folder; next; } $from = $1; $date = $2; $full_header = $line; $header = $line; $Recline = 0; # get user name from uucp path $from = $1 if $from =~ /.*!([^\n]+)/; # now, scan for Subject or empty line $subj = ""; while ( $line = <> ) { if ( $line =~ /^$/ ) { # force fall-though $subj = "(none)" unless $subj; last; } $full_header .= $line; # Skip trace header lines if ($Recline && $line !~ /^\s/) { $Recline = 0; } if ($Recline || $line =~ /^Received\s*:/) { $Recline = 1; } else { $header .= $line; } $subj = $1 if $line =~ /^Subject\s*:\s*([^\n]*)/; if ( $line =~ /^From\s*:\s*/ ) { $line = $'; if ( $line =~ /\(([^\n]+)\)/ ) { $from = $1; } elsif ( $line =~ /^"?([^<\n"]+)"?\s*<[^\n]+>/ ) { $from = $1; } elsif ( $line =~ /^\n]+)>?/ ) { $from = $1; } } } $folder = ""; study($header); for ($index = 1; !$folder && $index <= $MAXKEY; ++$index ) { $folder = $val[$index] if $header =~ /$key[$index]/i; } $folder = "inbox" unless $folder; $summary{$folder} .= sprintf(" %-12.12s \"%-17.17s\" %-.45s\n", $date,$from,$subj); close(fld); if ($folder =~ m#^[/.]#) { open(fld, ">>$folder"); } else { open(fld, ">>$Maildir/$folder"); } print fld $full_header,"\n"; } close(fld); open(new, ">>$Maildir/.sum/NEW"); while ( ($folder,$sumtext) = each(%summary) ) { print new $folder,":\n",$sumtext; open(sum, ">>$Maildir/.sum/$folder"); print sum $sumtext; close(sum); } close(new);