home *** CD-ROM | disk | FTP | other *** search
- /* -*-C-*-
- *******************************************************************************
- *
- * File: iso_convert.c
- * RCS: iso_convert.c,v 1.1 1997/12/17 16:45:03 tom Exp
- * Description: some iso header conversions.
- * Author: Tom Hageman <tom@basil.icce.rug.nl>
- * Created: Tue Dec 16 18:15:26 1997 (extracted/expanded from appnmail)
- * Modified:
- * Language: C
- * Package: mailapp-utilities
- * Status: Exp.
- *
- * (C) Copyright 1997, but otherwise this file is perfect freeware.
- *
- *******************************************************************************
- */
-
- #import <stdio.h>
- #import <string.h>
- #import <ctype.h>
- #import <regex.h>
- #import "optutil.h"
- #import "iso_convert.h"
-
- #import "iso2next.h"
- /* Tables in there are expected to be in the "recode --header" format. */
- /* XXX maybe we'd better use "recode --header --strict"? */
-
- size_t decode_quoted_printable(char *buf, const char *str, size_t len, int *err)
- {
- register char *d = buf;
- register const char *s = str, *e = s + len;
- int errors = 0;
-
- while (s < e)
- {
- if (*s == '=')
- {
- if (s >= e - 2) ++errors;
- else
- {
- // parse "=XX" where XX is a 2-digit hex number
- unsigned c = 0;
-
- sscanf(s+1, "%2x", &c);
- if (c >= 256) ++errors;
- else
- {
- s += 3;
- *d++ = c;
- continue;
- }
- }
- }
- else if (*s == '_') /* special-case for Q-P headers [rfc2047 4.2(2)] */
- {
- s += 1;
- *d++ = '\x20';
- continue;
- }
- *d++ = *s++;
- }
- if (err) (*err) += errors;
- return (d - buf);
- }
-
- size_t decode_base64(char *buf, const char *str, size_t len, int *err)
- {
- static char b64[256] = {0}, initialized = 0;
- register char *d = buf;
- register const char *s = str, *e = s + len;
- int errors = 0;
- int i;
- char b[4];
-
- if (!initialized)
- {
- static const unsigned char c64[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-
- for (i = 0; i < sizeof(b64); i++) b64[i] = -1;
- for (i = 0; i < 64; i++) b64[c64[i]] = i;
- ++initialized;
- }
-
- i = 0;
- /* Ignore `=' padding at the end. */
- while (s < e && e[-1] == '=') --e;
-
- while (s < e)
- {
- if ((signed)(b[i++] = b64[(unsigned char)*s++]) < 0) ++errors;
-
- if ((i &= 3) == 0)
- {
- *d++ = (b[0] << 2) | (b[1] >> 4);
- *d++ = (b[1] << 4) | (b[2] >> 2);
- *d++ = (b[2] << 6) | (b[3]);
- }
- }
- switch (i)
- {
- case 0:
- break;
- case 1:
- *d++ = (b[0] << 2);
- break;
- case 2:
- *d++ = (b[0] << 2) | (b[1] >> 4);
- *d++ = (b[1] << 4);
- case 3:
- *d++ = (b[0] << 2) | (b[1] >> 4);
- *d++ = (b[1] << 4) | (b[2] >> 2);
- *d++ = (b[2] << 6);
- break;
- }
- if (err) (*err) += errors;
- return (d - buf);
- }
-
- static size_t convert_table(char *buf, const char *str, size_t len, int *err, const unsigned char *table)
- {
- register char *d = buf;
- register const char *s = str, *e = s + len;
- int errors = 0;
-
- while (s < e)
- {
- if ((*d++ = table[(unsigned char)*s++]) == '\0')
- {
- ++errors;
- d[-1] = '?'; // Avoid null characters in string.
- }
- }
- if (err) (*err) += errors;
- return (d - buf);
- }
-
-
- typedef size_t (*decode_function)(char *buf, const char *str, size_t len, int *err);
- typedef size_t (*convert_function)(char *buf, const char *str, size_t len, int *err, const unsigned char *table);
-
- int iso_convert(char *line)
- {
- static const struct
- {
- const char *name;
- const unsigned char *contents;
- convert_function convert;
- }
- c_table[] = {
- /* The contents of these tables are defined in "iso2next.h".
- If you add one there, don't forget to add it here too. */
- { "iso-8859-1", latin1_to_next, convert_table },
- { "iso-8859-2", latin2_to_next, convert_table },
- /* {{this could be expanded with a dedicated iso-2022-jp to EUC filter,
- if we were so inclined. However, this can also be solved externally,
- with the following rule near the top of your ~/.procmailrc:
-
- :0fh
- * =\?iso-2022-jp\?
- | nkf -e -m
-
- nkf -- Network Kanji code conversion Filter (v1.6 or better.)
- }} */
- };
- static const struct
- {
- char tag;
- decode_function decode;
- }
- d_table[] = {
- { 'Q', decode_quoted_printable },
- { 'B', decode_base64 },
- };
- static struct regex *isore = 0;
- int errors = 0;
- const char *name = "";
-
- if (!isore) isore = re_compile("=?\\([^?]*\\)?\\([A-Za-z]\\)?",0);
-
- while (re_match(line, isore) > 0)
- {
- const unsigned char *tt = NULL;
- convert_function convert = NULL;
- decode_function decode = NULL;
- int i, namelen;
- char coding = isore->braslist[1][0];
- char *s, *t, *p;
- size_t len;
-
- name = isore->braslist[0];
- namelen = (isore->braelist[0] - isore->braslist[0]);
- for (i = 0; i < sizeof(c_table)/sizeof(c_table[0]); i++)
- {
- if (strncasecmp(c_table[i].name, name, namelen) == 0 &&
- c_table[i].name[namelen] == 0)
- {
- convert = c_table[i].convert;
- name = c_table[i].name;
- tt = c_table[i].contents;
- break;
- }
- }
- for (i = 0; i < sizeof(d_table)/sizeof(d_table[0]); i++)
- {
- if (d_table[i].tag == toupper(coding))
- {
- decode = d_table[i].decode;
- break;
- }
- }
-
- s = (char *)isore->end;
-
- if ((t = strstr(s, "?=")) == NULL)
- {
- fprintf(stderr, "%s: warning: Missing terminating `?=' in MIME 8-bit header `%s'\n", progname(), line);
- len = strlen(s);
- }
- else
- {
- len = t - s;
- }
-
- /* Ignore whitespace between `encoded-words' [RFC2047 6.2] */
- for (p = line; p < isore->start && isspace(*p); p++) ;
-
- if (p < isore->start)
- {
- /* Just to be sure, assume iso-latin1 encoding outside match. */
- convert_table(line, line, (isore->start - line), &errors, latin1_to_next);
- line = (char *)isore->start;
- }
-
- if (tt == NULL)
- {
- fprintf(stderr, "%s: warning: MIME 8-bit header encoding `%.*s' is unsupported\n", progname(), namelen, name);
- len += s - line;
- }
- else if (decode == NULL)
- {
- fprintf(stderr, "%s: warning: MIME header transfer-encoding `%c' is unsupported\n", progname(), coding);
- len += s - line;
- }
- else
- {
- len = (*decode)(line, s, len, &errors);
- len = (*convert)(line, line, len, &errors, tt);
- strcpy(line + len, t ? t + 2 : "");
- }
- line = line + len;
- }
-
- /* Just to be sure, assume iso-latin1 encoding outside match. */
- convert_table(line, line, strlen(line), &errors, latin1_to_next);
-
- if (errors > 0)
- {
- fprintf(stderr, "%s: warning: MIME 8-bit header encoding `%s' incomplete conversion\n", progname(), name);
- return -1;
- }
- return 0;
- }
-