#!/usr/local/bin/perl --					# -*-Perl-*-
eval "exec /usr/local/bin/perl -S $0 $*"
    if 0;

# Append messages or Usenet articles to a Babyl or UNIX mail file.
# Copyright (C) 1991, 1992, 1994 Free Software Foundation, Inc.
# François Pinard <pinard@iro.umontreal.ca>, March 1992.
#
# March 1989: tried an sh script to convert between message formats.
# August 1991: Perl version to convert rn, UNIX mail and Babyl to Babyl.
# March 1992: added UNIX mail format output.
# July 1994: added many inputs to a single output, added summary listing.

$usage = "\
Usage: $0 [ OPTION ] ... [INPUT] ...
  -b OUTPUT  convert all INPUT files into Babyl on OUTPUT
  -u OUTPUT  convert all INPUT files into UNIX format on OUTPUT
  -s         produce one summary line per input message
  -v         produce progress messages and dots on stderr

Options -b and -u cannot be used simultaneously.\n";

# NOTE: Perl versions previous to 4.019 might be unable to process the:
#   if (/$PATTERN_1/o || /$PATTERN_2/o || /$PATTERN_3/o || /$PATTERN_4/o)
# line, below, unless the o suffixes are removed; but at the price of an
# untolerable slowness.

# BUG: A missing `*** EOOH ***' line fools babyl in deleting the whole
# message.  Chris Moore <moore@src.bae.co.uk>, 1992-07-21.

$NOWHERE_STATE = 0;
$HEADER_STATE = 1;
$BODY_STATE = 2;
$SKIP_BABYL_STATE = 3;

#---------------------------------------------------------------------->
;# ctime.pl is a simple Perl emulation for the well known ctime(3C) function.
;#
;# Waldemar Kebsch, Federal Republic of Germany, November 1988
;# kebsch.pad@nixpbe.UUCP
;# Modified March 1990, Feb 1991 to properly handle timezones
;#  $Id: babyl2mbox,v 1.9 1996/09/08 04:04:57 friedman Exp $
;#   Marion Hakanson (hakanson@cse.ogi.edu)
#   Oregon Graduate Institute of Science and Technology
;#
;# usage:
;#
;#     #include <ctime.pl>          # see the -P and -I option in perl.man
;#     $Date = &ctime(time);

CONFIG: {
    package ctime;

    @DoW = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
    @MoY = ('Jan','Feb','Mar','Apr','May','Jun',
	    'Jul','Aug','Sep','Oct','Nov','Dec');
}

sub ctime {
    package ctime;

    local($time) = @_;
    local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst);

    # Determine what time zone is in effect.
    # Use GMT if TZ is defined as null, local time if TZ undefined.
    # There's no portable way to find the system default timezone.

    $TZ = defined($ENV{'TZ'}) ? ( $ENV{'TZ'} ? $ENV{'TZ'} : 'GMT' ) : '';
    ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
        ($TZ eq 'GMT') ? gmtime($time) : localtime($time);

    # Hack to deal with 'PST8PDT' format of TZ
    # Note that this can't deal with all the esoteric forms, but it
    # does recognize the most common: [:]STDoff[DST[off][,rule]]

    if($TZ=~/^([^:\d+\-,]{3,})([+-]?\d{1,2}(:\d{1,2}){0,2})([^\d+\-,]{3,})?/){
        $TZ = $isdst ? $4 : $1;
    }
    $TZ .= ' ' unless $TZ eq '';

    $year += ($year < 70) ? 2000 : 1900;
    sprintf("%s %s %2d %2d:%02d:%02d %s%4d\n",
      $DoW[$wday], $MoY[$mon], $mday, $hour, $min, $sec, $TZ, $year);
}
1;
#----------------------------------------------------------------------<

$month{"Jan"} = 1;
$month{"Feb"} = 2;
$month{"Mar"} = 3;
$month{"Apr"} = 4;
$month{"May"} = 5;
$month{"Jun"} = 6;
$month{"Jul"} = 7;
$month{"Aug"} = 8;
$month{"Sep"} = 9;
$month{"Oct"} = 10;
$month{"Nov"} = 11;
$month{"Dec"} = 12;

$TODAY = &ctime (time);
chop $TODAY;

while ($ARGV[0] =~ /^-/)
{
    if ($ARGV[0] eq "-s")
    {
	$summary = 1;
	shift;
    }
    elsif ($ARGV[0] eq "-v")
    {
	$verbose = 1;
	shift;
    }
    elsif ($ARGV[0] eq "-b")
    {
	$babyl_format = 1;
	shift;
	$output_name = shift;
    }
    elsif ($ARGV[0] eq "-u")
    {
	$unix_format = 1;
	shift;
	$output_name = shift;
    }
    else
    {
	die $usage;
    }
}
die $usage if ($babyl_format && $unix_format);

if ($output_name)
{
    if (-f $output_name)
    {
	open (OUTPUT, ">>$output_name")
	    || die "$0: Cannot append to $output_name.\n";
    }
    else
    {
	open (OUTPUT, ">$output_name")
	    || die "$0: Cannot create $output_name.\n";
	if ($babyl_format)
	{
	    print STDERR "Making new Babyl file: $output_name\n" if $verbose;
	    print OUTPUT
		"BABYL OPTIONS: -*- rmail -*-\n",
		"Version: 5\n",
		"Labels:\n",
		"Note:   This is the header of an rmail file.\n",
		"Note:   If you are seeing it in rmail,\n",
		"Note:    it means the file has no messages in it.\n",
		"";
	}
    }
}

$FROM = "From [^ ]+";
$REMOTE = "remote from [^ ]+";
$DAY = "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)";
$DATE = "([ 0-2][0-9]|3[01])";
$MONTH = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
$YEAR = "(19)?[7-9][0-9]";
$TIME = "([ 0-1][0-9]|2[0-3]):[0-5][0-9](:[0-5][0-9])?";
$ZONE = "([A-Z][A-Z]T|[a-z][a-z]t|[-+][0-1][0-9][0-5][0-9])";

$PATTERN_1 = "^?Article [0-9]+ of [^ ]+:\$";
$PATTERN_2 = "^?$FROM +$DAY $MONTH $DATE $TIME( $ZONE)? $YEAR( $REMOTE)?\$";
$PATTERN_3 = "^?$FROM +$DAY, $DATE $MONTH $YEAR $TIME( $ZONE)?( $REMOTE)?\$";
$PATTERN_4 = "^^_?$DATE-$MONTH-$YEAR $TIME-$ZONE,[0-9]+;[0-9]+\$";

$MMDF_SEPARATOR = "\1" x 20;	# (Is it really MMDF?  I'm not sure...)

while (<>)
{
    if ($. == 1)
    {
	if ($verbose)
	{
	    printf STDERR "%s %s: ", ($babyl_format ? "Babylizing" :
				      $unix_format ? "Unixizing" : "Scanning"),
		$ARGV;
	}
	if ($summary)
	{
	    print "\n$ARGV\n";
	}
	$state = $NOWHERE_STATE;
	$counter = 0;		# converted messages counter
    }

    if (/$PATTERN_1/o || /$PATTERN_2/o || /$PATTERN_3/o || /$PATTERN_4/o)
    {
	&complete_body;
	&start_header;
    }
    elsif (/^[A-Z][a-z]+(-[A-Z][a-z]+)*:[ \t]/ && $. == 1)
    {
	&start_header;
	push (@header, $_);
    }
    elsif (/^\f$/)
    {
	&complete_body;
	$state = $SKIP_BABYL_STATE;
    }
    elsif (/^$/)
    {
	&complete_body;
    }
    elsif (/^$MMDF_SEPARATOR$/o)
    {
	&complete_body;
	&start_header;
    }
    elsif (/^\*\*\* EOOH \*\*\*$/)
    {
	&diagnostic ("$ARGV:$.: Babyl without Babyl header\n")
	    if $state != $SKIP_BABYL_STATE;
	&complete_body;
	&start_header;
    }
    elsif (/^BABYL OPTIONS:( -\*- rmail -\*-)?$/ && $. == 1)
    {
	$state = $SKIP_BABYL_STATE;
    }
    elsif (/^$/ && $state == $HEADER_STATE)
    {
	&start_body;
    }
    elsif ($state == $HEADER_STATE)
    {
	push (@header, $_);
    }
    elsif ($state == $BODY_STATE)
    {
	print OUTPUT ">" if ($unix_format && /^_?From/i);
	print OUTPUT if $output_name;
	if (/^$/)
	{
	    $blanks++;
	}
	else
	{
	    $blanks = 0;
	}
    }
    elsif ($state != $SKIP_BABYL_STATE)
    {
	&diagnostic ("$ARGV:$.: $_");
    }

    if (eof)
    {
	&complete_body;
	close ARGV;
	print STDERR "done ($counter messages)\n" if $verbose;
    }
}

close OUTPUT if $output_name;
exit 0;


### Start or complete message header or body.

sub start_header
{
    @header = ();
    $state = $HEADER_STATE;
}

sub start_body
{
    local ($line, $from, $date, $subject);

    if ($babyl_format)
    {
	print OUTPUT
	    "\f\n",
	    "0, unseen,,\n",
	    "*** EOOH ***\n",
	    @header,
	    "\n";
    }
    if ($unix_format)
    {
	$from = "unknown";
	foreach $line (@header)
	{
	    if ($line =~ /^From:[ \t]*(.*)/i)
	    {
		$from = $1;
		$from = $1 if $from =~ /<(.+)>/;
		$from =~ s/ *\(.*\) *//;
		$from =~ s/ *".*" *//;
	    }
	}
	print OUTPUT
	    "From $from $TODAY\n",
	    @header,
	    "\n";
    }
    if ($summary)
    {
	$from = "-----";
	$date = "-- --- --";
	$subject = "(none)";
	foreach $line (@header)
	{
	    if ($line =~ /^From:[ \t]*(.*)/i)
	    {
		if ($line =~ /^From:[ \t]*([^<]*)<(.*)>/i)
		{
		    $name = $1;
		    $from = $2;
		}
		elsif ($line =~ /^From:[ \t]*([^\(]*)\((.*)\)/i)
		{
		    $from = $1;
		    $name = $2;
		}
		elsif ($line =~ /^From:[ \t]*(.*)/i)
		{
		    $from = $1;
		    $name = "";
		}
		$name = $1 if $name =~ /^ *"(.*)" *$/;
		$name =~ s/^ +//;
		$name =~ s/ +$//;
		if ($name)
		{
		    while ($name =~ /=\?ISO-8859-1\?Q\?([^?]*)\?=/i)
		    {
			$before = $`;
			$after = $';
			$string = $1;
			$string =~ s/_/ /g;
			while ($string =~ /=([0-9A-F][0-9A-F])/i)
			{
			    $string = $` . pack ("H2", $1) . $';
			}
			$name = $before . $string . $after;
		    }
		    $from = $name;
		}
		else
		{
		    $from =~ s/^ +//;
		    $from =~ s/ +$//;
		}
		$from =~ s/  +/ /g;
	    }
	    elsif ($line
		   =~ /^Date:[ \t]*((19|20)[0-9][0-9]-[01][0-9]-[0-3][0-9])/i)
	    {
		$date = $1;
	    }
	    elsif ($line =~ /^Date:[ \t]*(.*)/i)
	    {
		$date = $1;
		$date =~ s/[A-Z][a-z][a-z], //;
		$date =~ s/^([1-9]) /0\1 /;
		$date =~ s/199([0-9])/9\1/;
		$date =~ s/ +[0-2]?[0-9]:.*//;
		if ($date =~ /^$DATE $MONTH $YEAR$/o)
		{
		    ($date, $month, $year) = split (' ', $date);
		    $date = sprintf ("%4d-%.2d-%.2d",
				     1900 + $year, $month{$month}, $date);
		}
		else
		{
		    $date = "....-..-..";
		}
	    }
	    elsif ($line =~ /^Subject:[ \t]*(.*)/i)
	    {
		$subject = $1;
		$subject =~ s/  +/ /g;
	    }
	}
	$line = substr (sprintf ("%2d. %s %s: %s",
				 $counter + 1, $date, $from, $subject),
			0, 79);
	$line =~ s/ +$//;
	print $line, "\n";
    }
    $state = $BODY_STATE;
    $blanks = 0;
}

sub complete_body
{
    &start_body if $state == $HEADER_STATE;
    if ($state == $BODY_STATE)
    {
	if ($babyl_format)
	{
	    print OUTPUT "";
	}
	if ($unix_format)
	{
	    print OUTPUT "\n" while $blanks++ < 2;
	}
	$counter++;
	if ($verbose)
	{
	    print STDERR ".";
	    $newline_needed = 1;
	}
    }
    $state = $NOWHERE_STATE;
}

### Take care of diagnostics.

sub diagnostic
{
    if ($newline_needed)
    {
	print STDERR "\n";
	$newline_needed = 0;
    }
    print STDERR @_;
}
