Big Green CD 8

home *** CD-ROM | disk | FTP | other *** search

/ Big Green CD 8 / BGCD_8_Dev.iso / NEXTSTEP / UNIX / GNU / recode-3.4-MIHS / src / charset.pl < prev next >

Wrap

Text File | 1994-01-06 | 6.7 KB | 315 lines

# Automatically derive charset.c from rfc1345.txt. # Copyright (C) 1993, 1994 Free Software Foundation, Inc. # Francois Pinard <pinard@iro.umontreal.ca>, 1993. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. $overall_header = <<END_OF_TEXT; /* DO NOT MODIFY THIS FILE! It was generated by "charset.pl". */ /* Conversion of files between different charsets and usages. Copyright (C) 1990, 1993 Free Software Foundation, Inc. Francois Pinard <pinard@iro.umontreal.ca>, 1993. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "recode.h" END_OF_TEXT $split_point = 80; $charset_ordinal = 0; $discard_charset = 0; $alias_count = 0; $header = ""; while (<>) { # Recognize `&charset'. if (/^&charset (.*)/) { # Before beginning a new charset, process the previous one. $next_charset = $1; &complete_charset; $charset = $next_charset; # Save the charset name for further declaration. Announce # this charset in the array initialization section, and # initialize its processing. print STDERR $charset, "..."; $header = "\n/* $charset\n"; $hashname = $charset; $hashname =~ tr/A-Z/a-z/; $hashname =~ s/[^a-z0-9]//g; if ($used{$hashname}) { printf STDERR " duplicate of %s...", $used{$hashname}; $discard_charset = 1; next; } $used{$hashname} = $charset; $alias_count = 0; @table = (); $codedim = 0; $code = 0; $list = ""; $body = ""; next; } # Ignore discarded charsets, and character set escapes. next if $discard_charset; next if /^&g[0-4]esc/; # Recognize other `&' directives. if (/^&rem (.*)/) { # Save C comments for Texinfo. $body .= "$1\n"; next; } if (/^&alias (.*)/) { # Save synonymous charset names for later declarations. $alias = $1; $header .= " $alias\n"; $hashname = $alias; $hashname =~ tr/A-Z/a-z/; $hashname =~ s/[^a-z0-9]//g; if ($used{$hashname} && $used{$hashname} ne $charset) { printf STDERR " duplicate of %s...", $used{$hashname}; next; } $used{$hashname} = $charset; $list .= "," if $list; $list .= $alias; push (@declare_alias, "$alias,$charset"); $alias_count++; next; } if (/^&code (.*)/) { # Save the code position. $code = $1; next; } if (/^&duplicate/) { # Ignore duplicates for now. next; } if (/^&([^ ]+)/) { # This is an unrecognized & line, discard the charset. print STDERR " &$1..."; $discard_charset = 1; next; } # Save all other tokens into the double table. foreach $token (split) { if ($token ne "??" && $token ne "__") { $table[$code] = $token; if (length ($token) > $codedim) { $codedim = length ($token); } } $code++; } } # Process the last accumulated charset. &complete_charset; &complete_file (2); # Print the documentation. open (TEXI, ">charset.texi"); for $charset (sort keys %body) { print TEXI "\n@item $charset\n"; @list = sort (split (/,/, $list{$charset})); if (@list == 1) { print TEXI "@code{", $list[0], "} is an alias for this charset.\n"; } elsif (@list > 0) { $string = "@code{" . join ("}, @code{", @list) . "}"; $string =~ s/,([^,]+)$/ and\1/; print TEXI $string, " are aliases for this charset. \n"; } print TEXI $body{$charset}; } close TEXI; exit 0; # Routine for printing all accumulated initialization information, and # then closing the file. Argument is 1 or 2 depending on output file. sub complete_file { # Print the collectable initialization function. print SRC "\n"; print SRC "void\n"; printf SRC "module_charset%d (void)\n", $_[0]; print SRC "{\n"; $counter = $_[0] == 1 ? 0 : $split_point; while ($string = shift @declare_charset) { $string =~ s/(.*),/"\1", /; print SRC " declare_double_step (&table_$counter, $string);\n"; $counter++; } print SRC "\n"; while ($string = shift @declare_alias) { $string =~ s/,/", "/; print SRC " declare_alias (\"$string\");\n"; } print SRC "}\n"; close SRC; } # Routine for printing all accumulated information for the charset. # If the charset should be discarded, adjust tables. sub complete_charset { if ($discard_charset) { while ($alias_count-- > 0) { pop @declare_alias; } $discard_charset = 0; print STDERR " DISCARDED\n"; } elsif ($header) { # Save the documentation. $list{$charset} = $list; $body{$charset} = $body; # Open a new source file if needed. if ($charset_ordinal == 0) { print STDERR " writing on charset1.c..."; open (SRC, ">charset1.c"); print SRC $overall_header; } elsif ($charset_ordinal == $split_point) { &complete_file (1); print STDERR " writing on charset2.c..."; open (SRC, ">charset2.c"); print SRC $overall_header; } # Make introductory C comments. print SRC $header; print SRC "*/\n"; # Make the table for this charset. print SRC "\n"; print SRC "static DOUBLE_TABLE table_$charset_ordinal = \n"; print SRC " {\n"; $code = 0; for ($index1 = 0; $index1 < 8; $index1++) { $header = ""; $flag = 0; for ($index2 = 0; $index2 < 32; $index2++) { $token = $table[$code++]; $flag = 1 if $token; $token .= " " while length ($token) < $codedim; $header .= $token; } if ($flag) { $header =~ s/\"/\\\"/g; print SRC " \"", $header, "\",\n"; } else { print SRC " NULL,\n"; } } print SRC " };\n"; # Register the table. push (@declare_charset, "$charset,$codedim"); print STDERR " done\n"; $charset_ordinal++; } $header = ""; }