home *** CD-ROM | disk | FTP | other *** search
- # Automatically derive charset.c from rfc1345.txt.
- # Copyright (C) 1993, 1994 Free Software Foundation, Inc.
- # Francois Pinard <pinard@iro.umontreal.ca>, 1993.
-
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2, or (at your option)
- # any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- $overall_header = <<END_OF_TEXT;
- /* DO NOT MODIFY THIS FILE! It was generated by "charset.pl". */
-
- /* Conversion of files between different charsets and usages.
- Copyright (C) 1990, 1993 Free Software Foundation, Inc.
- Francois Pinard <pinard@iro.umontreal.ca>, 1993.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
- #include "recode.h"
- END_OF_TEXT
-
- $split_point = 80;
-
- $charset_ordinal = 0;
- $discard_charset = 0;
- $alias_count = 0;
-
- $header = "";
-
- while (<>)
- {
-
- # Recognize `&charset'.
-
- if (/^&charset (.*)/)
- {
-
- # Before beginning a new charset, process the previous one.
-
- $next_charset = $1;
-
- &complete_charset;
-
- $charset = $next_charset;
-
- # Save the charset name for further declaration. Announce
- # this charset in the array initialization section, and
- # initialize its processing.
-
- print STDERR $charset, "...";
-
- $header = "\n/* $charset\n";
-
- $hashname = $charset;
- $hashname =~ tr/A-Z/a-z/;
- $hashname =~ s/[^a-z0-9]//g;
- if ($used{$hashname})
- {
- printf STDERR " duplicate of %s...", $used{$hashname};
- $discard_charset = 1;
- next;
- }
- $used{$hashname} = $charset;
-
- $alias_count = 0;
- @table = ();
- $codedim = 0;
- $code = 0;
- $list = "";
- $body = "";
- next;
- }
-
- # Ignore discarded charsets, and character set escapes.
-
- next if $discard_charset;
- next if /^&g[0-4]esc/;
-
- # Recognize other `&' directives.
-
- if (/^&rem (.*)/)
- {
-
- # Save C comments for Texinfo.
-
- $body .= "$1\n";
- next;
- }
- if (/^&alias (.*)/)
- {
-
- # Save synonymous charset names for later declarations.
-
- $alias = $1;
- $header .= " $alias\n";
-
- $hashname = $alias;
- $hashname =~ tr/A-Z/a-z/;
- $hashname =~ s/[^a-z0-9]//g;
- if ($used{$hashname} && $used{$hashname} ne $charset)
- {
- printf STDERR " duplicate of %s...", $used{$hashname};
- next;
- }
- $used{$hashname} = $charset;
-
- $list .= "," if $list;
- $list .= $alias;
- push (@declare_alias, "$alias,$charset");
- $alias_count++;
- next;
- }
- if (/^&code (.*)/)
- {
-
- # Save the code position.
-
- $code = $1;
- next;
- }
- if (/^&duplicate/)
- {
-
- # Ignore duplicates for now.
-
- next;
- }
- if (/^&([^ ]+)/)
- {
-
- # This is an unrecognized & line, discard the charset.
-
- print STDERR " &$1...";
- $discard_charset = 1;
- next;
- }
-
- # Save all other tokens into the double table.
-
- foreach $token (split)
- {
- if ($token ne "??" && $token ne "__")
- {
- $table[$code] = $token;
- if (length ($token) > $codedim)
- {
- $codedim = length ($token);
- }
- }
- $code++;
- }
- }
-
- # Process the last accumulated charset.
-
- &complete_charset;
- &complete_file (2);
-
- # Print the documentation.
-
- open (TEXI, ">charset.texi");
- for $charset (sort keys %body)
- {
- print TEXI "\n@item $charset\n";
- @list = sort (split (/,/, $list{$charset}));
- if (@list == 1)
- {
- print TEXI "@code{", $list[0], "} is an alias for this charset.\n";
- }
- elsif (@list > 0)
- {
- $string = "@code{" . join ("}, @code{", @list) . "}";
- $string =~ s/,([^,]+)$/ and\1/;
- print TEXI $string, " are aliases for this charset. \n";
- }
- print TEXI $body{$charset};
- }
- close TEXI;
-
- exit 0;
-
- # Routine for printing all accumulated initialization information, and
- # then closing the file. Argument is 1 or 2 depending on output file.
-
- sub complete_file
- {
-
- # Print the collectable initialization function.
-
- print SRC "\n";
- print SRC "void\n";
- printf SRC "module_charset%d (void)\n", $_[0];
- print SRC "{\n";
- $counter = $_[0] == 1 ? 0 : $split_point;
- while ($string = shift @declare_charset)
- {
- $string =~ s/(.*),/"\1", /;
- print SRC " declare_double_step (&table_$counter, $string);\n";
- $counter++;
- }
- print SRC "\n";
- while ($string = shift @declare_alias)
- {
- $string =~ s/,/", "/;
- print SRC " declare_alias (\"$string\");\n";
- }
- print SRC "}\n";
- close SRC;
- }
-
- # Routine for printing all accumulated information for the charset.
- # If the charset should be discarded, adjust tables.
-
- sub complete_charset
- {
- if ($discard_charset)
- {
- while ($alias_count-- > 0)
- {
- pop @declare_alias;
- }
- $discard_charset = 0;
- print STDERR " DISCARDED\n";
- }
- elsif ($header)
- {
-
- # Save the documentation.
-
- $list{$charset} = $list;
- $body{$charset} = $body;
-
- # Open a new source file if needed.
-
- if ($charset_ordinal == 0)
- {
- print STDERR " writing on charset1.c...";
- open (SRC, ">charset1.c");
- print SRC $overall_header;
- }
- elsif ($charset_ordinal == $split_point)
- {
- &complete_file (1);
-
- print STDERR " writing on charset2.c...";
- open (SRC, ">charset2.c");
- print SRC $overall_header;
- }
-
- # Make introductory C comments.
-
- print SRC $header;
- print SRC "*/\n";
-
- # Make the table for this charset.
-
- print SRC "\n";
- print SRC "static DOUBLE_TABLE table_$charset_ordinal = \n";
- print SRC " {\n";
- $code = 0;
- for ($index1 = 0; $index1 < 8; $index1++)
- {
- $header = "";
- $flag = 0;
- for ($index2 = 0; $index2 < 32; $index2++)
- {
- $token = $table[$code++];
- $flag = 1 if $token;
- $token .= " " while length ($token) < $codedim;
- $header .= $token;
- }
- if ($flag)
- {
- $header =~ s/\"/\\\"/g;
- print SRC " \"", $header, "\",\n";
- }
- else
- {
- print SRC " NULL,\n";
- }
- }
- print SRC " };\n";
-
- # Register the table.
-
- push (@declare_charset, "$charset,$codedim");
-
- print STDERR " done\n";
- $charset_ordinal++;
- }
- $header = "";
- }
-