home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Mega Top 1
/
os2_top1.zip
/
os2_top1
/
APPS
/
TEKST
/
GRECODE
/
CHARSET.PL
< prev
next >
Wrap
Text File
|
1993-12-19
|
6KB
|
289 lines
# Automatically derive charset.c from rfc1345.txt.
# Copyright (C) 1993 Free Software Foundation, Inc.
# Francois Pinard <pinard@iro.umontreal.ca>, 1993.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
open (HDR, ">charset.h");
print HDR <<END_OF_TEXT;
/* DO NOT MODIFY THIS FILE! It was generated by "charset.pl". */
/* Conversion of files between different charsets and usages.
Copyright (C) 1990, 1993 Free Software Foundation, Inc.
Francois Pinard <pinard@iro.umontreal.ca>, 1993.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
END_OF_TEXT
$charset_ordinal = 0;
$discard_charset = 0;
$alias_count = 0;
$header = "";
while (<>)
{
# Recognize `&charset'.
if (/^&charset (.*)/)
{
# Before beginning a new charset, process the previous one.
$next_charset = $1;
&complete_charset;
$charset = $next_charset;
# Save the charset name for further declaration. Announce
# this charset in the array initialization section, and
# initialize its processing.
print STDERR $charset, "...";
$header = "\n/* $charset\n";
$hashname = $charset;
$hashname =~ tr/A-Z/a-z/;
$hashname =~ s/[^a-z0-9]//g;
if ($used{$hashname})
{
printf STDERR " duplicate of %s...", $used{$hashname};
$discard_charset = 1;
next;
}
$used{$hashname} = $charset;
$alias_count = 0;
@table = ();
$codedim = 0;
$code = 0;
$list = "";
$body = "";
next;
}
# Ignore discarded charsets, and character set escapes.
next if $discard_charset;
next if /^&g[0-4]esc/;
# Recognize other `&' directives.
if (/^&rem (.*)/)
{
# Save C comments for Texinfo.
$body .= "$1\n";
next;
}
if (/^&alias (.*)/)
{
# Save synonymous charset names for later declarations.
$alias = $1;
$header .= " $alias\n";
$hashname = $alias;
$hashname =~ tr/A-Z/a-z/;
$hashname =~ s/[^a-z0-9]//g;
if ($used{$hashname} && $used{$hashname} ne $charset)
{
printf STDERR " duplicate of %s...", $used{$hashname};
next;
}
$used{$hashname} = $charset;
$list .= "," if $list;
$list .= $alias;
push (@declare_alias, "$alias,$charset");
$alias_count++;
next;
}
if (/^&code (.*)/)
{
# Save the code position.
$code = $1;
next;
}
if (/^&duplicate/)
{
# Ignore duplicates for now.
next;
}
if (/^&([^ ]+)/)
{
# This is an unrecognized & line, discard the charset.
print STDERR " &$1...";
$discard_charset = 1;
next;
}
# Save all other tokens into the double table.
foreach $token (split)
{
if ($token ne "??" && $token ne "__")
{
$table[$code] = $token;
if (length ($token) > $codedim)
{
$codedim = length ($token);
}
}
$code++;
}
}
# Process the last accumulated charset.
&complete_charset;
# Print the collectable initialization function.
print HDR "\n";
print HDR "void\n";
print HDR "module_charset (void)\n";
print HDR "{\n";
$counter = 0;
while ($string = shift @declare_charset)
{
$string =~ s/(.*),/"\1", /;
print HDR " declare_double_step (&table_$counter, $string);\n";
$counter++;
}
print HDR "\n";
while ($string = shift @declare_alias)
{
$string =~ s/,/", "/;
print HDR " declare_alias (\"$string\");\n";
}
print HDR "}\n";
close HDR;
# Print the documentation.
open (TEXI, ">charset.texi");
for $charset (sort keys %body)
{
print TEXI "\n@item $charset\n";
@list = sort (split (/,/, $list{$charset}));
if (@list == 1)
{
print TEXI "@code{", $list[0], "} is an alias for this charset.\n";
}
elsif (@list > 0)
{
$string = "@code{" . join ("}, @code{", @list) . "}";
$string =~ s/,([^,]+)$/ and\1/;
print TEXI $string, " are aliases for this charset. \n";
}
print TEXI $body{$charset};
}
close TEXI;
exit 0;
# Routine for printing all accumulated information for the charset.
# If the charset should be discarded, adjust tables.
sub complete_charset
{
if ($discard_charset)
{
while ($alias_count-- > 0)
{
pop @declare_alias;
}
$discard_charset = 0;
print STDERR " DISCARDED\n";
}
elsif ($header)
{
# Save the documentation.
$list{$charset} = $list;
$body{$charset} = $body;
# Make introductory C comments.
print HDR $header;
print HDR "*/\n";
# Make the table for this charset.
print HDR "\n";
print HDR "static DOUBLE_TABLE table_$charset_ordinal = \n";
print HDR " {\n";
$code = 0;
for ($index1 = 0; $index1 < 8; $index1++)
{
$header = "";
$flag = 0;
for ($index2 = 0; $index2 < 32; $index2++)
{
$token = $table[$code++];
$flag = 1 if $token;
$token .= " " while length ($token) < $codedim;
$header .= $token;
}
if ($flag)
{
$header =~ s/\"/\\\"/g;
print HDR " \"", $header, "\",\n";
}
else
{
print HDR " NULL,\n";
}
}
print HDR " };\n";
# Register the table.
push (@declare_charset, "$charset,$codedim");
print STDERR " done\n";
$charset_ordinal++;
}
$header = "";
}