home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC-Online 1998 February
/
PCOnline_02_1998.iso
/
filesbbs
/
os2
/
pgp263.arj
/
PGP263I.SRC
/
PGP263II.ZIP
/
contrib
/
langtool
/
language.c
< prev
next >
Wrap
C/C++ Source or Header
|
1994-05-07
|
14KB
|
640 lines
/*
* language.c - Foreign language translation for PGP
* Finds foreign language "subtitles" for English phrases
* in external foriegn language text file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "usuals.h"
#ifndef LANGTOOL
#include "fileio.h"
#include "language.h"
#include "pgp.h"
#else
#define MAX_PATH 255
boolean verbose;
long fsize();
#endif
char langfile[80] = "language.txt";
#define LANG_INDEXFILE "language.idx"
#define STRBUFSIZE 2048
char language[16] = "en"; /* The language code, defaults to English */
static char *strbuf;
static char lang[16]; /* readstr sets this to the language id of the msg it last read */
static int subtitles_available = 0;
static int line = 0;
static int errcount = 0;
/* subtitles_available is used to determine if we know whether the special
subtitles_file exists. subtitles_available has the following values:
0 = first time thru, we don't yet know if subtitles_file exists.
1 = we have already determined that subtitles_file exists.
-1 = we have already determined that subtitles_file does not exist.
*/
static void error(char *);
#define NEWLINE 0
#define COMMENT 1
#define INSTRING 2
#define ESCAPE 3
#define IDENT 4
#define DONE 5
#define ERROR 6
#define ERR1 7
/* Look for and return a quoted string from the file.
* If nlabort is true, return failure if we find a blank line
* before we find the opening quote.
*/
static char *
readstr (FILE *f, char *buf, int nlabort)
{
int c, d;
char *p = buf;
int state = NEWLINE;
int i = 0;
while ((c = getc(f)) != EOF) {
if (c == '\r')
continue;
/* line numbers are only incremented when creating index file */
if (line && c == '\n')
++line;
switch (state) {
case NEWLINE:
switch(c) {
case '#': state = COMMENT; break;
case '"': state = INSTRING; break;
case '\n':
if (nlabort) {
*buf = '\0';
return(buf);
}
default:
if (i == 0 && isalnum(c)) {
state = IDENT;
lang[i++] = c;
break;
}
if (!isspace(c)) {
error("syntax error\n");
state = ERROR;
}
}
break;
case COMMENT:
if (c == '\n')
state = NEWLINE;
break;
case INSTRING:
switch(c) {
case '\\': state = ESCAPE; break;
case '"': state = DONE; break;
default: *p++ = c;
}
break;
case ESCAPE:
switch (c) {
case 'n': *p++ = '\n'; break;
case 'r': *p++ = '\r'; break;
case 't': *p++ = '\t'; break;
case 'e': *p++ = '\033'; break;
case 'a': *p++ = '\007'; break;
case '#':
case '"':
case '\\': *p++ = c; break;
case '\n': break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* ANSI C rules: up to 3 octal digits */
d = c - '0';
if ((c = getc(f)) >= '0' && c <= '7') {
d = (d<<3) + (c-'0');
if ((c = getc(f)) >= '0' && c <= '7')
d = (d<<3) + (c-'0');
else
ungetc(c, f);
} else {
ungetc(c, f);
}
*p++ = d;
break;
default:
error("illegal escape sequence: ");
fprintf(stderr, "'\\%c'\n", c);
break;
}
state = INSTRING;
break;
case IDENT: /* language identifier */
if (c == ':') {
state = NEWLINE;
break;
}
if (c == '\n' && strncmp(lang, "No translation", 14) == 0)
{
i = 0;
state = NEWLINE;
break;
}
lang[i++] = c;
if (i == 15 || !isalnum(c) && !isspace(c)) {
lang[i] = '\0';
error("bad language identifier\n");
state = ERROR;
i = 0;
}
break;
case DONE:
if (c == '\n') {
lang[i] = '\0';
*p = '\0';
return(buf);
}
if (!isspace(c)) {
error("extra characters after '\"'\n");
state = ERROR;
}
break;
case ERROR:
if (c == '\n')
state = ERR1;
break;
case ERR1:
state = (c == '\n' ? NEWLINE : ERROR);
break;
}
}
if (state != NEWLINE)
error("unexpected EOF\n");
return(NULL);
}
static struct indx_ent {
word32 crc;
long offset;
} *indx_tbl = NULL;
static int max_msgs = 0;
static int nmsg = 0;
static FILE *langf;
static struct {
long lang_fsize; /* size of language.txt */
char lang[16]; /* language identifier */
int nmsg; /* number of messages */
} indx_hdr;
static int make_indexfile(char *);
word32 crcupdate(byte, word32);
void init_crc();
/*
* uses 24-bit CRC function from armor.c
*/
static word32
message_crc(char *s)
{
word32 crc = 0;
while (*s)
crc = crcupdate(*s++, crc);
return(crc);
}
/*
* lookup file offset in indx_tbl
*/
static long
lookup_offset(word32 crc)
{
int i;
for (i = 0; i < nmsg; ++i)
if (indx_tbl[i].crc == crc)
return(indx_tbl[i].offset);
return(-1);
}
#ifndef LANGTOOL
static void init_lang();
/*
* return foreign translation of s
*/
char *
PSTR (char *s)
{
long filepos;
if (subtitles_available == 0)
init_lang();
if (subtitles_available < 0)
return(s);
filepos = lookup_offset(message_crc(s));
if (filepos == -1) {
return(s);
} else {
fseek(langf, filepos, SEEK_SET);
readstr(langf, strbuf, 1);
}
if (strbuf[0] == '\0')
return(s);
for (s = strbuf; *s; ++s)
*s = EXT_C(*s);
return(strbuf);
}
/*
* initialize the index table: read it from language.idx or create
* a new one and write it to the index file. A new index file is
* created if the language set in config.pgp doesn't match the one
* in language.idx or if the size of language.txt has changed.
*/
static void
init_lang()
{
char indexfile[MAX_PATH];
char subtitles_file[MAX_PATH];
FILE *indexf;
if (strcmp(language, "en") == 0) {
subtitles_available = -1;
return; /* use default messages */
}
buildfilename (subtitles_file, langfile);
if ((langf = fopen(subtitles_file, "rb")) == NULL) {
subtitles_available = -1;
return;
}
init_crc();
if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
fprintf(stderr, "Not enough memory for foreign subtitles\n");
fclose(langf);
subtitles_available = -1;
return;
}
buildfilename(indexfile, LANG_INDEXFILE);
if ((indexf = fopen(indexfile, "rb")) != NULL) {
if (fread(&indx_hdr, sizeof(indx_hdr), 1, indexf) == 1 &&
indx_hdr.lang_fsize == fsize(langf) &&
strcmp(indx_hdr.lang, language) == 0)
{
nmsg = indx_hdr.nmsg;
indx_tbl = (struct indx_ent *) malloc(nmsg * sizeof(struct indx_ent));
if (indx_tbl == NULL) {
fprintf(stderr, "Not enough memory for foreign subtitles\n");
fclose(indexf);
fclose(langf);
subtitles_available = -1;
return;
}
if (fread(indx_tbl, sizeof(struct indx_ent), nmsg, indexf) != nmsg)
{
free(indx_tbl); /* create a new one */
indx_tbl = NULL;
}
}
fclose(indexf);
}
if (indx_tbl == NULL && make_indexfile(indexfile) < 0) {
fclose(langf);
subtitles_available = -1;
} else {
subtitles_available = 1;
}
}
#endif /* !LANGTOOL */
/*
* build the index table in memory, and if indexfile is not NULL,
* write it to this file
*/
static int
make_indexfile(char *indexfile)
{
FILE *indexf;
long filepos;
int total_msgs = 0;
char *res;
rewind(langf);
indx_hdr.lang_fsize = fsize(langf);
strncpy(indx_hdr.lang, language, 15);
init_crc();
line = 1;
nmsg = 0;
while (readstr(langf, strbuf, 0)) {
if (nmsg == max_msgs) {
if (max_msgs) {
max_msgs *= 2;
indx_tbl = (struct indx_ent *) realloc(indx_tbl, max_msgs *
sizeof(struct indx_ent));
} else {
max_msgs = 400;
indx_tbl = (struct indx_ent *) malloc(max_msgs *
sizeof(struct indx_ent));
}
if (indx_tbl == NULL) {
fprintf(stderr, "Not enough memory for foreign subtitles\n");
return(-1);
}
}
++total_msgs;
indx_tbl[nmsg].crc = message_crc(strbuf);
if (lookup_offset(indx_tbl[nmsg].crc) != -1)
error("message CRC not unique.\n");
do {
filepos = ftell(langf);
res = readstr (langf, strbuf, 1); /* Abort if find newline first */
if (*language == '\0') /* use first language found */
strcpy(language, lang);
} while (res && strbuf[0] != '\0' && strcmp(language, lang) != 0);
if (res == NULL)
break;
if (strbuf[0] == '\0') /* No translation */
continue;
indx_tbl[nmsg].offset = filepos;
++nmsg;
do
res = readstr (langf, strbuf, 1); /* Abort if find newline first */
while (res && strbuf[0] != '\0');
}
line = 0;
indx_hdr.nmsg = nmsg;
if (verbose)
fprintf(stderr, "%s: %d messages, %d translations for language \"%s\"\n",
langfile, total_msgs, nmsg, language);
if (nmsg == 0) {
fprintf(stderr, "No translations available for language \"%s\"\n\n",
language);
return(-1);
}
if (indexfile) {
if ((indexf = fopen(indexfile, "wb")) == NULL)
fprintf(stderr, "Cannot create %s\n", indexfile);
else {
fwrite(&indx_hdr, 1, sizeof(indx_hdr), indexf);
fwrite(indx_tbl, sizeof(struct indx_ent), nmsg, indexf);
if (ferror(indexf) || fclose(indexf))
fprintf(stderr, "error writing %s\n", indexfile);
}
}
return(0);
}
static void
error(char *s)
{
++errcount;
if (langfile[0])
fprintf(stderr, "%s:", langfile);
if (line)
fprintf(stderr, "%d:", line);
fprintf(stderr, " %s", s);
}
#ifdef LANGTOOL
/*
* language string tool for manipulating language files
* link with CRC routines from armor.c
*/
#define CMD_EXTRACT 1
#define CMD_CHECK 2
#define CMD_MERGE 3
extern char *optarg;
extern int optind;
main(int argc, char **argv)
{
int opt, cmd = 0, rc = 0;
char *langIDs[16];
char *outfile = NULL;
init_crc();
if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
perror(argv[0]);
exit(1);
}
while ((opt = getopt(argc, argv, "cxmo:")) != EOF) {
switch (opt) {
case 'c': cmd = CMD_CHECK; break;
case 'x': cmd = CMD_EXTRACT; break;
case 'm': cmd = CMD_MERGE; break;
case 'o': outfile = optarg; break;
default: usage();
}
}
argc -= optind; argv += optind;
switch (cmd) {
case CMD_EXTRACT:
if (argc < 2)
usage();
rc = extract(argv[0], outfile, &argv[1]);
break;
case CMD_MERGE:
if (argc < 2)
usage();
rc = merge(argv[0], argv[1], outfile, argv[2]);
break;
case CMD_CHECK:
verbose = 1;
if (argc == 0)
checkfile("language.txt");
else
while (--argc >= 0)
checkfile(*argv++);
break;
default: usage();
}
exit(rc);
}
usage()
{
fprintf(stderr, "usage: langtool -[x|c|m] [-o outputfile] ...\n\n\
To extract one or more languages from a merged file:\n\
langtool -x [-o outputfile] file langID...\n\n\
To check a language file for syntax errors:\n\
langtool -c file...\n\n\
To merge language \"lang\" from lang_file with source_file:\n\
langtool -m [-o outputfile] source_file lang_file [lang]\n");
exit(1);
}
merge(char *base_file, char *lang_file, char *outfile, char *langID)
{
FILE *fp, *outf;
long fpos = 0, filepos;
int newmsgs = 0;
if ((langf = fopen(lang_file, "r")) == NULL) {
perror(lang_file);
return -1;
}
strcpy(langfile, lang_file);
if (langID)
strcpy(language, langID);
else
language[0] = '\0'; /* use first language found */
errcount = 0;
make_indexfile(NULL);
if (errcount)
return -1;
langfile[0] = '\0'; /* don't print filename in error msgs */
if ((fp = fopen(base_file, "r")) == NULL) {
perror(base_file);
return -1;
}
if (outfile == NULL)
outf = stdout;
else {
if ((outf = fopen(outfile, "w")) == NULL) {
perror(outfile);
return(-1);
}
}
while (readstr(fp, strbuf, 0)) {
copypos(fp, outf, fpos);
fpos = ftell(fp);
filepos = lookup_offset(message_crc(strbuf));
if (filepos == -1) {
fprintf(outf, "No translation\n");
++newmsgs;
} else {
fseek(langf, filepos, SEEK_SET);
readstr(langf, strbuf, 1);
copypos(langf, outf, filepos);
}
while (readstr(fp, strbuf, 1))
if (*strbuf == '\0')
break;
}
copypos(fp, outf, fpos);
fflush(outf);
if (ferror(outf)) {
perror(outfile);
return -1;
}
if (newmsgs)
fprintf(stderr, "%d untranslated messages\n", newmsgs);
return errcount;
}
extract(char *infile, char *outfile, char **langIDs)
{
FILE *fp, *outf;
long fpos = 0;
char **langID;
if ((fp = fopen(infile, "r")) == NULL) {
perror(infile);
return -1;
}
if (outfile == NULL) {
outf = stdout;
} else {
if ((outf = fopen(outfile, "w")) == NULL) {
perror(outfile);
fclose(fp);
return(-1);
}
}
while (readstr(fp, strbuf, 0)) {
copypos(fp, outf, fpos);
fpos = ftell(fp);
while (readstr(fp, strbuf, 1)) {
if (*strbuf == '\0')
break;
for (langID = langIDs; *langID; ++langID) {
if (strcmp(lang, *langID) == 0)
copypos(fp, outf, fpos);
}
fpos = ftell(fp);
}
}
copypos(fp, outf, fpos);
fflush(outf);
if (ferror(outf)) {
perror(outfile);
return -1;
}
return 0;
}
checkfile(char *name)
{
if ((langf = fopen(name, "rb")) == NULL) {
perror(name);
return -1;
}
strcpy(langfile, name);
language[0] = '\0'; /* count messages for first language */
errcount = 0;
make_indexfile(NULL);
fclose(langf);
return errcount;
}
copypos(FILE *f, FILE *g, long pos)
{
long size;
size = ftell(f) - pos;
fseek(f, pos, SEEK_SET);
copyfile(f, g, size);
}
copyfile(FILE *f, FILE *g, long n)
{
int c;
while (--n >= 0 && (c = getc(f)) != EOF)
putc(c, g);
}
long
fsize(FILE *f)
{
long len, pos;
pos = ftell(f);
fseek(f, 0L, SEEK_END);
len = ftell(f);
fseek(f, pos, SEEK_SET);
return len;
}
#endif /* LANGTOOL */