package Pod::IsoPod;

=head1 NAME

Pod::IsoPod - converts ISO-8859-1 characters to pod Escapes and back again.

=head1 SYNOPSIS

    use Pod::IsoPod;
    use FileHandle;

    my $InFile = new FileHandle;
    my $OutFile = new FileHandle;
    open ($InFile, '<' . $ARGV[0]);
    open ($OutFile, '>' . $ARGV[1]); # Error checking is left as an exercise
    iso2ascii($InFile, $OutFile);    # ISO8859 characters to Pod Escapes
    ascii2iso($InFile, $OutFile);    # Pod Escapes to ISO8859 characters

=head1 DESCRIPTION

Pod::IsoPod is a module that can be used to convert pod files written
with an editor that supports the ISO-8859-1 character set to 7bit ascii
text or vice versa.

Each ISO-8859-1 character is converted to an HTML Escape string. these strings
are understood by the pod2xxx programs.

=head1 SEE ALSO

    asciipod2isopod - program that converts ascii pods to iso8859 character set
    isopod2asciipod - program that converts iso8859 pods to ascii character set

=head1 AUTHOR

Simon Washbrook E<lt>F<SimonWashbrook@compuserve.com>E<gt>

=head1 TODO

    All of it.

=cut

require Exporter;
@ISA = Exporter;
@EXPORT = qw(iso2ascii ascii2iso);

#use strict;
#use FileHandle;

# Stolen from Pod::Text.pm by Tom Christiansen
my %HTML_Escapes = (
		    "Aacute"	=> "\xC1", #   capital A, acute accent
		    "aacute"	=> "\xE1", #   small a, acute accent
		    "Acirc"	=> "\xC2", #   capital A, circumflex accent
		    "acirc"	=> "\xE2", #   small a, circumflex accent
		    "AElig"	=> "\xC6", #   capital AE diphthong (ligature)
		    "aelig"	=> "\xE6", #   small ae diphthong (ligature)
		    "Agrave"	=> "\xC0", #   capital A, grave accent
		    "agrave"	=> "\xE0", #   small a, grave accent
		    "Aring"	=> "\xC5", #   capital A, ring
		    "aring"	=> "\xE5", #   small a, ring
		    "Atilde"	=> "\xC3", #   capital A, tilde
		    "atilde"	=> "\xE3", #   small a, tilde
		    "Auml"	=> "\xC4", #   capital A, dieresis or umlaut mark
		    "auml"	=> "\xE4", #   small a, dieresis or umlaut mark
		    "Ccedil"	=> "\xC7", #   capital C, cedilla
		    "ccedil"	=> "\xE7", #   small c, cedilla
		    "Eacute"	=> "\xC9", #   capital E, acute accent
		    "eacute"	=> "\xE9", #   small e, acute accent
		    "Ecirc"	=> "\xCA", #   capital E, circumflex accent
		    "ecirc"	=> "\xEA", #   small e, circumflex accent
		    "Egrave"	=> "\xC8", #   capital E, grave accent
		    "egrave"	=> "\xE8", #   small e, grave accent
		    "ETH"	=> "\xD0", #   capital Eth, Icelandic
		    "eth"	=> "\xF0", #   small eth, Icelandic
		    "Euml"	=> "\xCB", #   capital E, dieresis or umlaut mark
		    "euml"	=> "\xEB", #   small e, dieresis or umlaut mark
		    "Iacute"	=> "\xCD", #   capital I, acute accent
		    "iacute"	=> "\xED", #   small i, acute accent
		    "Icirc"	=> "\xCE", #   capital I, circumflex accent
		    "icirc"	=> "\xEE", #   small i, circumflex accent
		    "Igrave"	=> "\xCD", #   capital I, grave accent
		    "igrave"	=> "\xED", #   small i, grave accent
		    "Iuml"	=> "\xCF", #   capital I, dieresis or umlaut mark
		    "iuml"	=> "\xEF", #   small i, dieresis or umlaut mark
		    "Ntilde"	=> "\xD1", #   capital N, tilde
		    "ntilde"	=> "\xF1", #   small n, tilde
		    "Oacute"	=> "\xD3", #   capital O, acute accent
		    "oacute"	=> "\xF3", #   small o, acute accent
		    "Ocirc"	=> "\xD4", #   capital O, circumflex accent
		    "ocirc"	=> "\xF4", #   small o, circumflex accent
		    "Ograve"	=> "\xD2", #   capital O, grave accent
		    "ograve"	=> "\xF2", #   small o, grave accent
		    "Oslash"	=> "\xD8", #   capital O, slash
		    "oslash"	=> "\xF8", #   small o, slash
		    "Otilde"	=> "\xD5", #   capital O, tilde
		    "otilde"	=> "\xF5", #   small o, tilde
		    "Ouml"	=> "\xD6", #   capital O, dieresis or umlaut mark
		    "ouml"	=> "\xF6", #   small o, dieresis or umlaut mark
		    "szlig"	=> "\xDF", #   small sharp s, German (sz ligature)
		    "THORN"	=> "\xDE", #   capital THORN, Icelandic
		    "thorn"	=> "\xFE", #   small thorn, Icelandic
		    "Uacute"	=> "\xDA", #   capital U, acute accent
		    "uacute"	=> "\xFA", #   small u, acute accent
		    "Ucirc"	=> "\xDB", #   capital U, circumflex accent
		    "ucirc"	=> "\xFB", #   small u, circumflex accent
		    "Ugrave"	=> "\xD9", #   capital U, grave accent
		    "ugrave"	=> "\xF9", #   small u, grave accent
		    "Uuml"	=> "\xDC", #   capital U, dieresis or umlaut mark
		    "uuml"	=> "\xFC", #   small u, dieresis or umlaut mark
		    "Yacute"	=> "\xDD", #   capital Y, acute accent
		    "yacute"	=> "\xFD", #   small y, acute accent
		    "yuml"	=> "\xFF", #   small y, dieresis or umlaut mark
		    
		    "lchevron"	=> "\xAB", #   left chevron (double less than)
		    "rchevron"	=> "\xBB", #   right chevron (double greater than)
		    );

# Reverse the above hash
my %Escapes_To_HTML;
while (my ($Key,$Value) = each %HTML_Escapes)
{
    $Escapes_To_HTML{$Value} = $Key;
}

###########################################################################
# Loop over all of the input file and map the iso characters to the
# HTML escapes.
# Algolrithm adapted from Pod::Text.pm by Tom Christiansen
sub iso2ascii
{
    my $InFile = shift;
    my $OutFile = shift;

    while (<$InFile>)
    {
      if (! m /^\s+/) {
	s{
	  ([\200-\377])
	 }
	  {
	    do {
	      defined $Escapes_To_HTML{$1}
		? do { "E<".$Escapes_To_HTML{$1}.">" }
		  : do {
		    warn "Unknown 8 bit character \"$1\" in $_";
		    $1;
		  }
		}
	  }egx;
      }
      print $OutFile $_;
    }
}


###########################################################################
# Loop over all of the input file and map the HTML escapes to ISO
# characters.
# Algolrithm adapted from Pod::Text.pm by Tom Christiansen
sub ascii2iso
{
    my $InFile = shift;
    my $OutFile = shift;

    while (<$InFile>)
    {
	s{
	    E<([A-Za-z]+)>
	}
	{
	   do {
	       defined $HTML_Escapes{$1}
	       ? do { $HTML_Escapes{$1} }
	       : do {
		   # Leave all others alone
		   "E<$1>";
	           }
	      }
        }egx;
	print $OutFile $_;
    }
}

1;
