Xref: utzoo comp.mail.elm:4066 comp.lang.perl:5150 comp.unix.questions:30930 alt.sources.wanted:1238 Path: utzoo!utgpu!cunews!cognos!garyp From: garyp@cognos.UUCP (Gary Puckering) Newsgroups: comp.mail.elm,comp.lang.perl,comp.unix.questions,alt.sources.wanted Subject: Re: Parsing Mail Headers Message-ID: <9587@cognos.UUCP> Date: 1 May 91 15:10:50 GMT References: <1991Apr30.164513.391@mrspoc.Transact.COM> Reply-To: garyp@cognos.UUCP (Gary Puckering) Followup-To: comp.mail.elm Organization: Cognos Inc., Ottawa, Canada Lines: 118 In article <1991Apr30.164513.391@mrspoc.Transact.COM> steven@Transact.COM writes: >Does anyone have a program or PERL script or... that already does parsing >of mail headers? I have no doubt that this exists, and based on a >thread realized that a mail folder sorter could be written fairly easily >once the parsing is done. And why reinvent the wheel, right? Here's a script I wrote which sorts mailbox files. The one thing it doesn't do is normalize dates to GMT before sorting. Other than that, it does the job. Also included is a script which checks your mailboxes for various kinds of problems, such as messages with the same date and From lines that are not preceded by blank lines. ---------------------- cut here -------------------------------------------- #!/usr/local/bin/perl -i.bak # Sort mail folder in date sequence # Usage: # sortmail file... # # Backup saved in "file.bak". $MONTH = "JanFebMarAprMayJunJulAugSepOctNovDec"; while (<>) { if (/^From +(.+) +(\w+) +(\w+) +(\w+) +(\d+[:]\d+[:]\d+)( *\w*) +(\d+)/) { $from = $1; $dow = $2; $month = index($MONTH,$3)/3+1; $day = $4; $time = $5; $tzone = $6; $year = $+; $date = sprintf("%04d-%02d-%02d_%s",$year,$month,$day,$time); print STDERR "\n$ARGV:\n Sorting " if $ARGV ne $oldargv; $oldargv = $ARGV; $msg_no++; print STDERR "$msg_no "; print STDERR "\n " if !($msg_no % 20); if ($msg{$date} ne "") { print STDERR "\n**Duplicate message $msg_no\n->$_ "; } } $msg{$date} .= $_; $map{$date} = $msg_no; $in_cnt += length($_); if (eof) { print STDERR "\n Writing "; foreach $k (sort (keys %msg)) { $len = length($msg{$k}); print $msg{$k}; $out_cnt += $len; $msg_no = $map{$k}; print STDERR "$msg_no "; print STDERR "\n " if !($msg_no % 20); } $delta = $input - $output; print STDERR "\n Byte count: input $in_cnt output $out_cnt diff $delta\n"; undef %msg; undef %map; undef $date; $in_cnt = 0; $out_cnt = 0; $msg_no = 0; } } ---------------------- cut here -------------------------------------------- #!/usr/local/bin/perl # Check sequence of mail folders # Usage: # checkmail file... $MONTH = "JanFebMarAprMayJunJulAugSepOctNovDec"; while (<>) { if (/^From +(.+) +(\w+) +(\w+) +(\w+) +(\d+[:]\d+[:]\d+)( *\w*) +(\d+)/) { $from = $1; $dow = $2; $month = index($MONTH,$3)/3+1; $day = $4; $time = $5; $tzone = $6; $year = $+; $date = sprintf("%04d-%02d-%02d_%s",$year,$month,$day,$time); print "$ARGV:\n" if $ARGV ne $oldargv; $oldargv = $ARGV; $msg_no++; if ($msg{$date} ne "") { print STDERR "** Duplicate message $msg_no\n-> $_"; } $msg{$date} = "X"; if ($date lt $last_date) { print STDERR "** Out of sequence at $msg_no\n-> $_"; } $last_date = $date; if ($last_line !~ /^\s*$/) { print STDERR "** From line not preceded by blank line at $msg_no\n-> $_"; } } $last_line = $_; chop($last_line); if (eof) { undef %msg; undef $date; undef $last_date; undef $last_line; $msg_no = 0; } } -- Gary Puckering Cognos Incorporated VOICE: (613) 738-1338 x6100 P.O. Box 9707 UUCP: uunet!mitel!cunews!cognos!garyp Ottawa, Ontario INET: garyp%cognos.uucp@uunet.uu.net CANADA K1G 3Z4