home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Mega Top 1
/
os2_top1.zip
/
os2_top1
/
APPS
/
TEKST
/
GRECODE
/
CHARSET.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-12-22
|
14KB
|
561 lines
/* Conversion of files between different charsets and usages.
Copyright (C) 1990, 1993 Free Software Foundation, Inc.
Francois Pinard <pinard@iro.umontreal.ca>, 1993.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "recode.h"
/* Maximum number of charset values. */
#define MAX_CHARSETS 200
/* Hash table size for charset names. */
#define HASH_TABLE_SIZE 997
/* Maximum number of characters per 10646 symbol. */
#define MAX_SYMBOL_SIZE 9
/* Known character sets. */
struct hash
{
const char *name; /* charset or alias name, or NULL */
CHARSET *charset; /* associated charset */
struct hash *next; /* next index in table, or NULL */
};
struct hash hash_table[HASH_TABLE_SIZE];
CHARSET charset_array[MAX_CHARSETS];
int number_of_charsets;
/* Array of strings ready for argmatch. */
static const char **argmatch_array;
#include "charset.h"
/*--------------------------------------.
| Prepare charsets for initialization. |
`--------------------------------------*/
void
prepare_charset_initialization (void)
{
int counter;
for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
{
hash_table[counter].name = NULL;
hash_table[counter].next = NULL;
}
number_of_charsets = 0;
}
/*-----------------------------------------------------------------------.
| Return a newly allocated copy of charset NAME, with upper case letters |
| turned into lower case, and all non alphanumeric discarded. |
`-----------------------------------------------------------------------*/
static char *
cleanup_charset_name (const char *name)
{
char *result;
char *out;
const char *in;
int character;
result = xmalloc (strlen (name) + 1);
out = result;
for (in = name; *in; in++)
{
character = *(const unsigned char *) in;
if ((character >= 'a' && character <= 'z')
|| (character >= '0' && character <= '9'))
*out++ = character;
else if (character >= 'A' && character <= 'Z')
*out++ = character - 'A' + 'a';
}
*out = '\0';
return result;
}
/*-----------------------------------.
| Return a hash index for a STRING. |
`-----------------------------------*/
/* The hash function is naive, but sufficient for our purpose. */
static int
hash_string (const char *string)
{
int value;
value = 0;
for (; *string; string++)
value = ((value * 31 + (int) *(const unsigned char *) string)
% HASH_TABLE_SIZE);
return value;
}
/*--------------------------------------------------------------------------.
| Return the charset from its NAME or alias name. If it does not already |
| exist, add a new charset entry and initialize it with a brand new value. |
`--------------------------------------------------------------------------*/
CHARSET *
find_charset (const char *name)
{
char *hashname;
struct hash *hash;
CHARSET *charset;
/* Search the whole hash bucket and return any match. */
hashname = cleanup_charset_name (name);
for (hash = hash_table + hash_string (hashname);
hash->name;
hash = hash->next)
{
if (strcmp (hashname, hash->name) == 0)
{
free (hashname);
return hash->charset;
}
if (!hash->next)
break;
}
/* A new charset has to be created. */
if (number_of_charsets == MAX_CHARSETS)
error (EXIT_FAILURE, 0, "MAX_CHARSETS is too small");
charset = charset_array + number_of_charsets++;
/* If the current slot is already used, create an overflow entry and
initialize it enough so it could be taken for the current slot. */
if (hash->name)
{
hash->next = (struct hash *) xmalloc (sizeof (struct hash));
hash = hash->next;
hash->next = NULL;
}
/* Initialize the current slot with the new charset. */
hash->name = hashname;
hash->charset = charset;
charset->name = name;
charset->ignore = 0;
charset->table = NULL;
return charset;
}
/*-------------------------------------------------------------------------.
| Have NAME as an alternate charset name for OLD_NAME. Create OLD_NAME if |
| it does not exist already. |
`-------------------------------------------------------------------------*/
void
declare_alias (const char *name, const char *old_name)
{
char *hashname;
struct hash *hash;
CHARSET *old_charset;
/* Find the old value. */
old_charset = find_charset (old_name);
/* Search the whole hash bucket. */
hashname = cleanup_charset_name (name);
for (hash = hash_table + hash_string (hashname);
hash->name;
hash = hash->next)
{
if (strcmp (hashname, hash->name) == 0)
{
if (hash->charset != old_charset)
error (EXIT_FAILURE, 0, "Charset %s already exists and is not %s",
name, old_name);
free (hashname);
return;
}
if (!hash->next)
break;
}
/* If the current slot is already used, create an overflow entry and
initialize it enough so it could be taken for the current slot. */
if (hash->name)
{
hash->next = (struct hash *) xmalloc (sizeof (struct hash));
hash = hash->next;
hash->next = NULL;
}
/* Initialize the current slot with the new charset. */
hash->name = hashname;
hash->charset = old_charset;
}
/*------------------------------------------.
| Construct the string array for argmatch. |
`------------------------------------------*/
void
make_argmatch_array (void)
{
struct hash *hash; /* cursor in charsets */
int number; /* number of strings */
int counter; /* all purpose counter */
/* Count how many strings we need. */
number = 0;
for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
for (hash = hash_table + counter;
hash && hash->name;
hash = hash->next)
number++;
/* Allocate the argmatch array, with place for a NULL sentinel. */
argmatch_array
= (const char **) xmalloc ((number + 1) * sizeof (const char *));
/* Fill in the array. */
number = 0;
for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
for (hash = hash_table + counter;
hash && hash->name;
hash = hash->next)
argmatch_array[number++] = hash->name;
argmatch_array[number] = NULL;
}
/*-----------------------------------------------------------------------.
| Return the NAME of a charset, un-abbreviated and cleaned up. Diagnose |
| and abort if this cannot be done successfully. A NULL or empty string |
| means the default charset, if this default charset is defined. |
`-----------------------------------------------------------------------*/
const char *
clean_charset_name (const char *name)
{
char *hashname;
int ordinal;
/* Notify usage that we are decoding charsets. */
decoding_charset_flag = 1;
/* Look for a match. */
if (!name)
name = "";
#ifdef DEFAULT_CHARSET
if (!*name)
name = DEFAULT_CHARSET;
#endif
hashname = cleanup_charset_name (name);
ordinal = argmatch (hashname, argmatch_array);
free (hashname);
/* Diagnose any match error. */
switch (ordinal)
{
case -2:
error (0, 0, "Ambiguous charset `%s'", name);
usage (EXIT_FAILURE);
case -1:
error (0, 0, "Unknown charset `%s'", name);
usage (EXIT_FAILURE);
}
/* Notify usage that we are finished with charsets, then return. */
decoding_charset_flag = 0;
return argmatch_array[ordinal];
}
/*----------------------------------------------------------------------.
| Order two struct hash's, using the true charset name as the first key |
| and the current name as the second key. |
`----------------------------------------------------------------------*/
static int
compare_struct_hash (const void *void_first, const void *void_second)
{
int value;
value = strcmp (((const struct hash *) void_first)->charset->name,
((const struct hash *) void_second)->charset->name);
if (value != 0)
return value;
value = strcmp (((const struct hash *) void_first)->name,
((const struct hash *) void_second)->name);
return value;
}
/*------------------------------.
| List all available charsets. |
`------------------------------*/
void
list_all_charsets (void)
{
struct hash *array;
struct hash *hash;
int number;
int counter;
/* Count how many charsets we have. */
number = 0;
for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
for (hash = hash_table + counter;
hash && hash->name;
hash = hash->next)
number++;
/* Allocate a structure to hold them. */
array = (struct hash *) xmalloc (number * sizeof (struct hash));
/* Copy all charsets in it. */
number = 0;
for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
for (hash = hash_table + counter;
hash && hash->name;
hash = hash->next)
array[number++] = *hash;
/* Sort it. */
qsort (array, number, sizeof (struct hash), compare_struct_hash);
/* Print it, one line per charset, giving the true charset name first,
followed by all its alias in lexicographic order. */
for (hash = array; hash < array + number; hash++)
{
/* Begin a new line with the true charset name when it changes. */
if (hash == array || hash->charset->name != (hash - 1)->charset->name)
{
if (hash != array)
printf ("\n");
printf ("%s", hash->charset->name);
}
/* Print the charset name or alias in its cleaned up form. */
printf (" %s", hash->name);
}
printf ("\n");
/* Release the work array. */
free (array);
}
/*--------------------------------------------------------------------.
| Return a statically allocated 10646 symbol in a CHARSET for a given |
| CODE, or NULL if this symbol is not defined. |
`--------------------------------------------------------------------*/
static char *
code_to_symbol (CHARSET *charset, int code)
{
static char symbol[MAX_SYMBOL_SIZE + 1];
const char *in;
char *out;
int counter;
if (in = (*charset->table)[code / 32], !in)
return NULL;
in += charset->size * (code % 32);
if (*in == ' ')
return NULL;
out = symbol;
for (counter = 0; counter < charset->size; counter++)
if (*in == ' ')
in++;
else
*out++ = *in++;
*out = '\0';
return symbol;
}
/*------------------------------------------------------------------------.
| Print a 10646 symbol in a CHARSET for a given CODE, padding with spaces |
| after to the proper width. |
`------------------------------------------------------------------------*/
static void
print_symbol (CHARSET *charset, int code)
{
int counter;
char *cursor;
counter = 0;
cursor = code_to_symbol (charset, code);
if (cursor)
for (; *cursor && counter < charset->size; counter++)
{
putchar (*cursor);
cursor++;
}
for (; counter < charset->size; counter++)
putchar (' ');
}
/*------------------------------------------------------.
| Print a full CHARSET description on standard output. |
`------------------------------------------------------*/
void
list_full_charset (CHARSET *charset)
{
int insert_white; /* insert a while line before printing */
int code; /* code counter */
const char *symbol; /* symbol for code */
const char *charname; /* charname for code */
/* Ensure we have a double table to play with. */
if (!charset->table)
error (EXIT_FAILURE, 0,
"Cannot list `%s', no 10646 names available for this charset",
charset->name);
/* Print the long table. */
printf ("dec oct hex ch %s\n", charset->name);
insert_white = 1;
for (code = 0; code < 256; code++)
if ((symbol = code_to_symbol (charset, code)), symbol)
{
if (insert_white)
{
printf ("\n");
insert_white = 0;
}
printf ("%3d %0.3o %0.2x ", code, code, code);
print_symbol (charset, code);
if ((charname = symbol_to_charname (symbol)), charname)
printf (" %s", charname);
printf ("\n");
}
else
insert_white = 1;
}
/*------------------------------------------------------------------.
| Print a concise, tabular CHARSET description on standard output. |
`------------------------------------------------------------------*/
void
list_concise_charset (CHARSET *charset)
{
DOUBLE_TABLE *table; /* double table */
int half; /* half 0, half 1 of the table */
const char *format; /* format string */
int counter; /* code counter */
int counter2; /* code counter */
int code; /* code value */
/* Ensure we have a double table to play with. */
if (charset->table)
table = charset->table;
else
error (EXIT_FAILURE, 0,
"Cannot list `%s', no 10646 names available for this charset",
charset->name);
printf ("%s\n", charset->name);
/* Select format for numbers. */
switch (list_format)
{
case FULL_FORMAT:
return; /* cannot happen */
case NO_FORMAT:
case DECIMAL_FORMAT:
format = "%3d";
break;
case OCTAL_FORMAT:
format = "%0.3o";
break;
case HEXADECIMAL_FORMAT:
format = "%0.2x";
break;
}
/* Print both halves of the table. */
for (half = 0; half < 2; half++)
{
/* Skip printing this half if it is empty. */
for (counter = 4 * half; counter < 4 * half + 4; counter++)
if ((*table)[counter])
break;
if (counter == 4 * half + 4)
continue;
/* Print this half. */
printf ("\n");
for (counter = 128 * half; counter < 128 * half + 16; counter++)
for (counter2 = 0; counter2 < 128; counter2 += 16)
{
if (counter2 > 0)
printf (" ");
code = counter + counter2;
printf (format, code);
printf (" ");
print_symbol (charset, code);
if (counter2 == 112)
printf ("\n");
}
}
}