home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
DP Tool Club 31
/
CDASC_31_1996_juillet_aout.iso
/
internet
/
rnr214.zip
/
RNRCHAR.PAS
< prev
next >
Wrap
Pascal/Delphi Source File
|
1996-01-30
|
15KB
|
550 lines
unit rnrchar;
{
Character set support for rnr - handles conversion between
IBM PC codepages and character sets used on USENET and mail.
The implementation follows the MIME standard, defined in RFC 1521.
However, it does not implement all of the required transfer-encodings
and document types; thus it cannot claim to be MIME compatible.
In order for the conversion to be put into effect, the option
'--local-charset filename' must be used.
By Henrik Storner (storner@osiris.ping.dk)
modified by Russell_Schulz@locutus.ofB.ORG
}
{$i rnr-def.pas}
interface
uses rnrglob, genericf, rnrio, rnrproc, rnrfile;
{ These need to be global, as they are written in headers }
var
postingsetname: string; { charset used for outgoing posts }
mailingsetname: string; { charset used for outgoing mail }
mailxfername : string; { transfer-encoding for mail }
procedure linetolocal(var s: string);
procedure localtoline(var s: string);
procedure setreadencoding(chsetheader: string; encheader: string);
procedure setsendencoding(chsetheader: string; encheader: string);
procedure loadcharsets(fn: string);
implementation
type
charset_ptr = ^charset_rec;
charset_rec = record
name: string;
cnvtable : string;
next : charset_ptr;
end;
{ RFC 1521 transfer encodings }
xferenc = (
xfer_7bit, {* 7 bit - no translation needed *}
xfer_8bit, {* 8 bit - no translation needed *}
xfer_binary, {* binary - no translation needed *}
xfer_quoted, {* Quoted printable *}
xfer_base64, {* Base64 *}
xfer_user {* Userdefined - don't touch! *}
);
var
{ These are only used locally }
charsets : charset_ptr;
{ For decoding news and mail; default translation if no MIME header }
defaultset : charset_ptr;
{ For decoding news and mail; set up for each article }
decodingset : string; { Translation table for line->local }
decodingxfer : xferenc; { Current transfer encoding }
{ For encoding news and mail; set up after message is edited }
{ NOTE: This has to be setup after message is edited, as user may }
{ choose to alter the charset/transfer-encoding headers. }
encodingset : string; { Translation table for local->line }
encodingxfer : xferenc; { Transfer-encoding }
{ Characters passed uncoded when using quoted-printable }
mailsafechars : set of char;
function hexdigitval(c: char): integer;
begin
case c of
'0'..'9': hexdigitval:= ord(c) - ord('0');
'A'..'F': hexdigitval:= ord(c) - ord('A') + 10;
'a'..'f': hexdigitval:= ord(c) - ord('a') + 10;
else hexdigitval:= -1;
end;
end;
function hexstring(v: byte): string;
const
hx : array[0..15] of char = '0123456789ABCDEF';
begin
hexstring:= hx[v div 16] + hx[v mod 16];
end;
procedure transferencode(var s:string; encoding: xferenc);
{ Convert a line of data to the selected transfer-encoding }
var
r: string;
i: integer;
lastbreak: integer;
begin
case (encoding) of
xfer_7bit,
xfer_8bit,
xfer_binary,
xfer_user:
{ Don't change it }
;
xfer_base64:
{ NOT SUPPORTED }
;
xfer_quoted:
begin
r := '';
lastbreak:= 0; { Indicates last position for soft line-break }
for i:= 1 to length(s) do
if ( (s[i] in mailsafechars) or
((s[i] in [tab, ' ']) and (i < length(s))) ) then
begin
{ These are not encoded: Plain ascii except '=', plus }
{ SPACE and TAB that are not last on the line. }
if ((length(r) - lastbreak) > 73) then
begin
{ Need to insert soft linebreak }
r := r + '=' + lf;
lastbreak:= length(r);
end;
r := r + s[i];
end
else
begin
{ These get encoded }
if ((length(r) - lastbreak) > 71) then
begin
r := r + '=' + lf;
lastbreak:= length(r);
end;
r := r + '=' + hexstring(ord(s[i]));
end;
s := r;
end; { xfer_quoted }
end;
end;
procedure transferdecode(var s:string; encoding: xferenc);
{ Convert a line of data from the transferencoding form to }
{ canonical form. This must be done BEFORE applying the }
{ conversions necessary for the charset used. }
var
r: string;
i: integer;
hexval: integer;
begin
case (encoding) of
xfer_7bit, { 7-bit is OK. }
xfer_8bit, { 8-bit is OK. }
xfer_binary, { Binary may not work, because of linelength limitations! }
xfer_user: { No guarantee that this will work. }
{ Don't do a thing here }
;
xfer_base64:
if s<>'' then
s:= '<base64 decoding not supported>';
xfer_quoted:
{ Quoted-printable encoding. }
{ This encoding passes most ASCII characters unaltered. Exception is }
{ control characters and 8-bit characters, and the '=', which are encoded }
{ as <char> := "="hexval So decode by scanning for '=' and rebuilding }
{ original This encoding may add linebreaks in the middle of an input }
{ line, or may specify that no linebreak should occur at the end of an }
{ inputline. We are unable to obey neither of these. }
begin
r := '';
i := 1;
while (i <= length(s)) do
begin
if (s[i] = '=') then
begin
inc(i);
if (i < length(s)) then
begin
{ Expect 2-digit hex code here. }
{ i<length(s) ==> we do have two characters }
hexval:= 16*hexdigitval(s[i]) + hexdigitval(s[i+1]);
if (hexval >= 0) then
r := r + chr(hexval)
else
{ Coding error! Not valid hex digit. }
{ Pass thru unaltered. Warn user ?? }
r := r + copy(s, i-1, 3);
inc(i,2);
end
else { i>=length(s) }
if (i > length(s)) then
begin
{ '=' as last character ==> soft line break }
{ Cannot handle this. }
end
else
begin
{ Coding error - only one hex digit. }
{ Pass thru unaltered. Warn user ?? }
r := r + '=' + s[i];
inc(i);
end;
end
else { not `=' }
begin
{ Ordinary, unquoted character. Could check that }
{ it is a legal character (ASCII), but why bother }
r := r + s[i];
inc(i);
end;
end;
s := r;
end; { quoted printable }
end; { case }
end;
procedure detectcharset(var chsetheader: string; encheader: string;
var xlate: string; var xfer: xferenc;
defset: charset_ptr);
var
p: charset_ptr;
found: boolean;
begin
{ Process the character set header (if any) and set up translation }
{ tables for the linetolocal routine. Takes the contents of the }
{ 'Content-type' and 'Content-transfer-encoding' header as parameter. }
{ First, detect the character set defined by the content-type }
p := charsets;
found:= false;
chsetheader := lower(chsetheader);
if (chsetheader <> '') then
while ((p <> nil) and (not found)) do
begin
found := (pos(p^.name, chsetheader) <> 0);
if (not found) then
p:= p^.next;
end;
if found then
xlate:= p^.cnvtable
else
begin
if (defset = nil) then
xlate := ''
else
xlate:= defset^.cnvtable;
end;
{ Next, determine the transfer-encoding }
encheader := lower(encheader);
if (pos('7bit', encheader) <> 0) then
xfer := xfer_7bit
else if (pos('8bit', encheader) <> 0) then
xfer := xfer_8bit
else if (pos('quoted-printable', encheader) <> 0) then
xfer := xfer_quoted
else if (pos('base64', encheader) <> 0) then
xfer := xfer_base64
else if (pos('binary', encheader) <> 0) then
xfer := xfer_binary
else if (encheader <> '') then
xfer := xfer_user
else
xfer := xfer_7bit; { Default if no transfer-encoding specified }
end;
procedure setreadencoding(chsetheader: string; encheader: string);
begin
detectcharset(chsetheader, encheader, decodingset, decodingxfer, defaultset);
end;
procedure setsendencoding(chsetheader: string; encheader: string);
var
i: integer;
reverseset: string;
begin
detectcharset(chsetheader, encheader, reverseset, encodingxfer, nil);
{ Have to reverse the translation, as this is for local->line }
encodingset:= '';
for i:= 1 to 255 do
encodingset:= encodingset + chr(i);
{ If no header in message, then reverseset is empty }
if (reverseset <> '') then
for i:= 1 to 255 do
encodingset[ord(reverseset[i])] := chr(i);
end;
procedure loadcharsets;
type
wanted_type = (name_wanted, trans_wanted);
var
f: text;
l: string;
newset: charset_ptr;
p: charset_ptr;
w: wanted_type;
token: string;
i, err, v1, v2: integer;
begin
w := name_wanted;
safereset(f,fn);
if (fileresult > 0) then
begin
xwritelnss('Cannot open characterset file ', fn);
shutdown(1);
end;
while not eof(f) do
begin
readln(f, l);
token:= chopfirstw(l);
if (length(token) > 0) then
begin
if (token[1] <> '#') then
begin
case w of
name_wanted:
begin
new(newset);
newset^.name:= token;
newset^.cnvtable:= '';
for i:= 1 to 255 do
newset^.cnvtable:= newset^.cnvtable + chr(i);
newset^.next:= charsets;
charsets:= newset;
w := trans_wanted;
notquietlnss('Charset loaded: ', newset^.name);
repeat
token := lower(chopfirstw(l));
if (token = '/post') then
begin
postingsetname:= newset^.name;
notquietlns(' - used for outgoing posts');
end
else if (copy(token, 1, 5) = '/mail') then
begin
mailingsetname := newset^.name;
notquietlns(' - used for outgoing mail');
token:= copy(token, 7, 255);
if (token = '7bit') then
mailxfername := '7bit'
else if (token = '8bit') then
mailxfername := '8bit'
else if (token = 'quoted') then
mailxfername := 'quoted-printable'
else if (token = 'quoted-safe') then
begin
{ Extra safe quoted-printable }
{ Will survive EBCDIC gateways unscathed }
mailxfername := 'quoted-printable';
mailsafechars := ['A'..'Z', 'a'..'z', '0'..'9',
#39..#41, #43..#47, #58, #63];
notquietlns(
' - (enabling EBCDIC safe quoted-printable encoding)');
end
else
begin
xwritelnss('unknown mailset transferencoding: ',
token);
shutdown(1);
end;
notquietlnss(' - mail transfer-encoding: ', mailxfername);
end
else if (token = '/default') then
begin
defaultset := newset;
notquietlns(' - used as default for reading posts');
end
else if (token <> '') then
begin
xwritelnss('unknown charset option:', token);
shutdown(1);
end;
until (token = '');
end;
trans_wanted:
begin
if (lower(token) = 'end') then
w := name_wanted
else
begin
i:= pos('=', token);
if ((i = 0) or (i = length(token))) then
begin
xwritelnss('Bad charset conversion', token);
shutdown(1);
end;
val(copy(token, 1, i-1), v1, err);
if (err = 0) then
val(copy(token, i+1, length(token)-i), v2, err);
if (err <> 0) then
begin
xwritelnss('Bad charset conversion: ', token);
shutdown(1);
end;
{
(
not
(
v1 in [1..255]
)
)
and
(
not
(
v2 in [1..255]
)
)
}
if ((not (v1 in [1..255])) and (not (v2 in [1..255]))) then
begin
xwritelnss('Bad charset conversion, must be 1..255 : ',
token);
shutdown(1);
end;
newset^.cnvtable[v1] := chr(v2);
end;
end;
end; { case }
end; { not comment }
end; { not blank line }
end; { while not eof }
close(f);
end;
procedure linetolocal;
var
i: integer;
begin
{ First, process the transferencoding }
transferdecode(s, decodingxfer);
{ Next, apply any charactersets specified }
if (decodingset <> '') then
for i:= 1 to length(s) do
s[i]:= decodingset[ord(s[i])];
end;
procedure localtoline;
var
i: integer;
begin
{ Apply character set encoding }
if (encodingset <> '') then
for i:= 1 to length(s) do
s[i]:= encodingset[ord(s[i])];
{ Apply transfer-encoding }
transferencode(s, encodingxfer);
end;
begin { Module init code }
charsets := nil;
defaultset := nil;
postingsetname := 'us-ascii';
mailingsetname := 'us-ascii';
mailxfername := '7bit';
mailsafechars := [#33..#60, #62..#126];
end. { Module ends }