# encodings.pl - Download IANA text and compute alias list. # Assumes you are running this program from gnu/gcj/convert/. # Output suitable for direct inclusion in IOConverter.java. # Map IANA canonical names onto our canonical names. %map = ( 'ANSI_X3.4-1968' => 'ASCII', 'ISO_8859-1:1987' => '8859_1', 'UTF-8' => 'UTF8', 'Shift_JIS' => 'SJIS', 'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS', 'UTF16-LE' => 'UnicodeLittle', 'UTF16-BE' => 'UnicodeBig' ); if ($ARGV[0] eq '') { $file = 'character-sets'; if (! -f $file) { # Too painful to figure out how to get Perl to do it. system 'wget -o .wget-log http://www.iana.org/assignments/character-sets'; } } else { $file = $ARGV[0]; } # Include canonical names in the output. foreach $key (keys %map) { $output{lc ($key)} = $map{$key}; } open (INPUT, "< $file") || die "couldn't open $file: $!"; $body = 0; $current = ''; while () { chop; $body = 1 if /^Name:/; next unless $body; if (/^$/) { $current = ''; next; } ($type, $name) = split (/\s+/); # Encoding names are case-insensitive. We do all processing on # the lower-case form. my $lower = lc ($name); if ($type eq 'Name:') { $current = $map{$name}; if ($current) { $output{$lower} = $current; } } elsif ($type eq 'Alias:') { # The IANA list has some ugliness. if ($name ne '' && $lower ne 'none' && $current) { $output{$lower} = $current; } } } close (INPUT); foreach $key (sort keys %output) { print " hash.put (\"$key\", \"$output{$key}\");\n"; }