home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Columbia Kermit
/
kermit.zip
/
mm
/
mm-ccmd-0.91.tar.Z
/
mm-ccmd-0.91.tar
/
work
/
mm
/
token.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-12-18
|
16KB
|
869 lines
/*
* Copyright (c) 1986, 1990 by The Trustees of Columbia University in
* the City of New York. Permission is granted to any individual or
* institution to use, copy, or redistribute this software so long as it
* is not sold for profit, provided this copyright notice is retained.
*/
#ifndef lint
static char *rcsid = "$Header: /f/src2/encore.bin/cucca/mm/tarring-it-up/RCS/token.c,v 2.1 90/10/04 18:26:55 melissa Exp $";
#endif
/*
* token.c - rudimentary RFC822 address parsing
*/
#define MAIL11 /* for VMS type addresses */
#define ALLOWDOT /* allow dots in some extra places */
/* #define NOTMM */ /* use to get rid of MM dependencies */
#include <stdio.h>
#include "token.h"
#define _CHARTYPE_ARRAY_ /* necessary for chartype.h */
#ifdef NOTMM
#include "chartype.h"
#else
#include "mm.h"
#include "address.h"
#endif /* NOTMM */
/*
* token struct used for parsing rfc822 address lists
*/
typedef struct token {
unsigned char type; /* token type */
unsigned char ptype; /* parse type */
unsigned short len; /* token string length */
unsigned short plen; /* parse type length in tokens */
short clen; /* length of following comment */
char *text; /* pointer to static text */
char *ctext; /* text of following comment */
struct token *next; /* pointer to next token */
} token;
#ifdef TEST
#define add_addresslist(a,b,c)
#endif
/*
* test for whether we're looking at whitespace -- should be a macro
*/
folding (s)
char *s;
{
while (isblank (*s)) /* skip tabs, spaces */
++s;
if (*s=='\r')
++s;
if (*s == '\n' && isblank (*++s)) /* newline followed by whitespace? */
return 1; /* yes, line continuation */
return 0;
}
/*
* given a pointer to text after an opening '(', returns a pointer to the
* character following the matching ')' (or a null if the comment wasn't
* terminated properly).
*/
char *
eatcomment(s)
char *s;
{
int parencount = 1;
if (*s == '(') ++s;
while (*s) {
switch (*s) {
case '\\':
if (*++s) break; /* skip unless null */
return s;
case '(':
++parencount; /* parens nest */
break;
case ')':
if (--parencount == 0)
return ++s;
break;
case '\r':
case '\n': /* check for continuation */
if (!folding (s))
return s; /* bad continuation line */
break;
default:
if (iscntrl(*s))
return s; /* return pointer to null */
}
++s;
}
return s; /* return pointer to null */
}
/*
* given a pointer to an opening '"', return a pointer to the char following
* the closing '"'.
*/
char *
eatqst(s)
char *s;
{
while (*++s) {
switch (*s) {
case '\\':
++s;
break;
case '"':
return ++s;
case '\n':
if (!isblank(s[1]))
return s; /* not a continuation line */
break;
case '\0':
return s;
}
}
}
/*
* allocate a token struct and fill in the type and value
*/
token *
alloc_token (type, text, len)
int type, len;
char *text;
{
register token *t;
char * calloc ();
if (t = ((token *) calloc (1, sizeof (token)))) {
t->type = type;
t->text = text;
t->len = len;
}
return t;
}
/*
* break a null-terminated string into a list of tokens and return it.
* basically what you'd expect, except that any paren-delimited comment
* string is attached as a unit to the previous token, if any. Such
* comments are lost if they appear before any "significant" tokens, but
* I don't think that's much to worry about.
*/
token *
lex (s)
char *s;
{
register char *p = NULL;
token head, *tail, *t = NULL;
tail = head.next = &head;
while (*s && tail) {
while (isblank (*s)) {
do ++s; while (isblank (*s));
if (*s == '\n') {
if (folding (++s))
continue;
else {
/*
* data pointer in T_EOH token points to unparsed text
*/
tail->next = alloc_token (T_EOH, s, 0);
tail = tail->next;
tail->next = NULL;
return head.next;
}
}
}
if (!*s && (p == 0))
return NULL;
p = s; /* save start of string */
if (isatom (*s)
#ifdef ALLOWDOT
|| *s == '.'
#endif
) {
s++;
while (isatom (*s)
#ifdef ALLOWDOT
|| *s == '.'
#endif
)
s++;
t = alloc_token (T_ATOM, p, (int) (s-p));
}
else if (isspecial(*s)) {
switch (*s) {
case '(':
s = eatcomment(s);
tail->ctext = p;
tail->clen = (s - p);
continue;
case ')':
t = alloc_token (T_RPAREN, s, 1);
break;
case '<':
t = alloc_token (T_LROUTE, s, 1);
break;
case '>':
t = alloc_token (T_RROUTE, s, 1);
break;
case '@':
t = alloc_token (T_AT, s, 1);
break;
case ',':
if (tail->type == T_COMMA) {
++s; /* multiple commas are allowed, but */
continue; /* aren't meaningful */
}
t = alloc_token (T_COMMA, s, 1);
break;
case ';':
t = alloc_token (T_SEMI, s, 1);
break;
case ':':
#ifdef MAIL11
if (*(s+1) == ':') {
t = alloc_token(T_COLCOL, s, 2);
s++;
}
else
#endif
t = alloc_token (T_COLON, s, 1);
break;
case '\\':
if (*++s)
t = alloc_token (T_QPAIR, p, 2);
else
return NULL;
break;
case '"':
s = eatqst (s);
t = alloc_token (T_QSTR, p, (int) (s-p));
--s;
break;
case '.':
t = alloc_token (T_DOT, s, 1);
break;
case '[':
t = alloc_token (T_LDOMLIT, s, 1);
break;
case ']':
t = alloc_token (T_RDOMLIT, s, 1);
}
++s;
}
else if (*s) ++s; /* ignore the character */
tail = (tail->next = t);
}
if (tail)
tail->next = NULL;
return head.next;
}
/*
* advance a pointer along a token chain - should probably be a macro
*/
token *
advance (t, n)
token *t;
int n;
{
#if TEST > 1
char *untoken ();
char *p = untoken (t, n, ' ', 0);
if (p) {
printf ("advancing past %d tokens '%s'\n", n, p);
free (p);
}
#endif
while (t && (n-- > 0))
t = t->next;
return t;
}
/*
* turn a token list back into an ascii string. the string returned should
* be released with free().
*/
char *
untoken (t, n, stripcomments, dofree)
token *t;
int n, stripcomments, dofree;
{
char *p, *cp, *malloc ();
int i, len = 0;
token *head = t;
for (i = 0; t && (i < n); i++, t = t->next) {
len += t->len;
if (t->clen && !stripcomments)
len += t->clen + 1;
}
len += n; /* count delimiters, trailing null */
p = cp = malloc (len+1); /* get the space */
if (p) {
for (t = head, i = n; t && (i > 0); i--) {
strncpy (cp, t->text, t->len);
cp += t->len;
if (t->clen && !stripcomments) {
*cp = ' ';
strncpy (++cp, t->ctext, t->clen);
cp += t->clen;
if (t->next && isspace(t->ctext[t->clen]))
*cp++ = ' ';
}
else
if (!stripcomments && t->next && isblank(t->text[t->len]))
*cp++ = ' ';
if (dofree) {
token *old = t;
t = t->next;
free (old);
}
else
t = t->next;
}
*cp = 0;
}
return p;
}
/*
* are we looking at an RFC822 "phrase"?
*/
int
phrase (t)
token *t;
{
int len = 0;
while (t && (t->type == T_QSTR || t->type == T_ATOM)) {
t = advance (t, 1);
++len;
}
return len;
}
/*
* try to parse a domain name (one or more dot-delimited atoms)
*/
int
domain (t)
token *t;
{
int n = 0, needdot = 0;
if (t && (t->type == T_LDOMLIT))
return domlit (t);
while (t) {
if ((needdot && t->type == T_DOT) ||
(!needdot && t->type == T_ATOM))
needdot = ~needdot;
else
break;
t = advance (t, 1);
++n;
}
if (n && !needdot)
--n;
return n;
}
/*
* parse a domain literal, e.g. "[128.59.16.20]"
* the argument must be a pointer to a "[" token.
*/
int
domlit (t)
token *t;
{
int n;
if (t->type != T_LDOMLIT || (t = t->next) == NULL)
return 0;
n = domain (t);
t = advance (t, n);
if (t && t->type == T_RDOMLIT)
return n + 2;
return 0;
}
/*
* parse a "local-part" of an rfc822 mailbox, consisting of a dot-delimited
* list of quoted-strings and/or atoms.
*/
int
localpart (t)
token *t;
{
int n = 0, needdot = 0;
while (t) {
if ((needdot && t->type == T_DOT) ||
(!needdot && (t->type == T_QSTR || t->type == T_ATOM))) {
needdot = ~needdot;
n++;
}
else
break;
t = t->next;
}
if (n && !needdot)
--n; /* don't swallow trailing dot */
return n;
}
/*
* parse an RFC822 addr-spec -- "localpart@domain"
*/
int
addrspec (t)
token *t;
{
token *head = t;
int n, len = 0;
if (!t)
return 0;
len += (n = localpart (t));
if (n == 0)
return 0;
t = advance (t, n);
if (t && t->type == T_AT) {
len += 1;
if (t = advance (t, 1)) {
if (n = domain (t))
len += n;
else if (n = domlit (t))
len += n;
}
else
return 0;
}
head->ptype = T_ADDRSPEC;
head->plen = len;
return len;
}
/*
* parse a route, e.g. "@domain,...@domain:"
*/
int
route (t)
token *t;
{
int n = 0, len = 0;
while (t && t->type == T_AT && (t = advance (t, 1))) {
if ((n = domain (t)) || (n = domlit (t))) {
t = advance (t, n);
len += n + 2; /* commit to next token */
if (t->type == T_COLON)
return len;
else if (t->type == T_COMMA) {
t = advance (t, 1);
continue;
}
break;
}
}
return 0;
}
/*
* parse a routeaddr -- "<@domain,...,domain:localpart@domain>"
*/
int
routeaddr (t)
token *t;
{
int n, len = 1;
if (t) {
if (t->type != T_LROUTE)
return 0;
if ((t = t->next) == NULL)
return 0;
if (t->type == T_AT) {
len += (n = route (t));
if (n == 0 || ((t = advance (t, n)) == NULL))
return 0;
}
len += (n = addrspec (t));
if (n == 0 || ((t = advance (t, n)) == NULL))
return 0;
if (t->type == T_RROUTE)
return (len + 1);
}
return 0;
}
int
group (t)
token *t;
{
int n, len = 0;
token *tp = t;
if ((n = phrase (t)) > 0) {
if (t = advance (t, n)) {
if (t->type != T_COLON)
return 0;
len += n;
t = advance (t, 1);
len += 1;
for (;;) {
if (n = mailbox (t)) {
t = advance (t, n);
len += n;
if (t && (t->type == T_COMMA)) {
len += 1;
t = advance (t, 1);
continue;
}
}
break;
}
if (t && (t->type == T_SEMI)) {
tp->ptype = T_GROUPLIST;
tp->plen = ++len;
t->ptype = T_GROUPEND;
t->plen = 1;
return len;
}
}
}
return 0;
}
#ifdef MAIL11
/*
* parse mail11 addresses
* or at least the hostname:: part.
*/
int
mail11_mailbox (t)
token *t;
{
int n;
token *head = t;
if (t->type == T_ATOM) {
t = advance(t,1);
if (t && (t->type == T_COLCOL)) {
t = advance(t,1);
n = addrspec(t);
if (n == 0)
return(0);
head->ptype = T_MAIL11;
head->plen = n + 2;
return(n + 2); /* addr_spec + hostname + "::" */
}
}
return(0);
}
#endif
/*
* Parse "phrase route-addr" or "addrspec".
*/
int
rfc822_mailbox (t)
token *t;
{
token *head = t;
int n, len;
if (n = phrase(t)) {
len = n;
t = advance (t, n); /* skip past it */
}
if (n = routeaddr (t)) {
head->ptype = T_PHRASEADDR;
head->plen = len + n;
return head->plen; /* if followed by route-addr, done */
}
return addrspec (head); /* see if it's an addrspec */
}
int
mailbox (t)
token *t;
{
int n;
#ifdef MAIL11
if (n = mail11_mailbox(t)) return(n);
#endif
if (n = rfc822_mailbox(t)) return(n);
return(0);
}
static int
addrlist (t)
token *t;
{
token *tp;
int n, naddrs = 0;
while (t) {
tp = t;
if (n = group (t)) {
++naddrs;
if (t = advance (t, n)) {
if (t->type == T_COMMA) {
t->ptype = T_COMMA;
t->plen = 1;
t = advance (t, 1);
continue;
}
}
else
break;
}
else if (n = mailbox (t)) {
++naddrs;
if (t = advance (t, n)) {
if (t->type == T_COMMA) {
t->ptype = T_COMMA;
t->plen = 1;
t = advance (t, 1);
continue;
}
}
else
break;
}
if (t == NULL || t->type == T_EOH)
return naddrs;
/* parse problem - mark invalid tokens and try to continue */
tp->ptype = T_IGNORE; /* mark token string bad */
tp->plen = 1;
t = tp->next;
/* munch tokens till we find comma or end of string */
while (t && (t->type != T_COMMA) && (t->type != T_EOH)) {
++tp->plen;
t = advance (t, 1);
}
/* eat following comma */
if (t && (t->type == T_COMMA)) {
t = advance (t, 1);
}
}
return naddrs;
}
char *
unspace(str)
char *str;
{
char *cp;
while(isspace(*str)) str++;
cp = str + strlen(str) - 1;
while(isspace(*cp)) *cp-- = '\0';
return(str);
}
#ifndef NOTMM
match_addresses (a, buf, len)
char **buf;
addresslist *a;
int len;
{
int n;
token *t, *newt;
token *t2;
char *p,*cp;
if (strlen (*buf) < 1)
return;
t = lex (*buf);
if ((t == 0) || (t->type == T_EOH))
return;
for (n = 0, t2 = t; t2; t2 = t2->next)
++n;
/*
* Note that the untoken calls free the address token structs,
* hence the use of newt to step through the list.
*/
if (addrlist (newt = t)) {
int n;
token *t1;
while (t = newt) {
switch (t->ptype) {
case T_GROUPLIST:
for (n = 1, t1 = advance(t,1); t1->ptype == T_NONE;
t1 = advance(t1,1),n++)
;
newt = advance (t, n);
add_addresslist(a, unspace(untoken(t,n - 1,0,1)), ADR_GROUP);
break;
case T_GROUPEND:
newt = advance (t, 1);
add_addresslist(a, unspace(untoken(t,1,0,1)), ADR_GROUPEND);
break;
#ifdef MAIL11
case T_MAIL11:
#endif
case T_ADDRSPEC:
case T_PHRASEADDR:
newt = advance (t, t->plen);
cp = unspace(untoken(t,t->plen,0,1));
#ifndef TEST
if (strcmp(cp,".") == 0)
add_addresslist(a,user_name,ADR_ADDRESS);
else if (*cp == '*')
add_addresslist(a,tilde_expand(cp+1),ADR_FILE);
else if (lookup_alias(cp))
add_addresslist(a,cp,ADR_ALIAS);
else
#endif
add_addresslist(a,cp,ADR_ADDRESS);
break;
case T_IGNORE:
newt = advance (t, (t->plen ? t->plen : 1));
cp = unspace(untoken (t, t->plen, 0, 1));
if (use_address(cp))
add_addresslist(a, cp, ADR_ADDRESS);
break;
default:
newt = t->next;
#ifdef TEST
if (t->type != T_COMMA)
printf ("unknown token \"%s\"\n",
untoken (t, 1, 0, 1));
#endif
break;
}
}
}
}
use_address(str)
char *str;
{
#ifndef TEST
extern use_invalid_address;
switch(use_invalid_address) {
case SET_YES:
return(true);
case SET_NO:
printf("Invalid address: \"%s\"\n", str);
return(false);
case SET_ASK:
printf("Invalid address: \"%s\"\n", str);
return(yesno("Use anyway? "));
}
#else
printf("Invalid address: \"%s\"\n", str);
return(0);
#endif
}
#if TEST
main(argc,argv)
int argc;
char **argv;
{
addresslist a;
char *buf;
buf = (char *)malloc(512);
a.first = a.last = NULL;
while (fgets (buf, 512, stdin) != NULL) {
match_addresses(&a,&buf,strlen(buf));
}
}
#endif /* TEST */
#else /* NOTMM */
#if TEST
main (argc, argv)
int argc;
char *argv[];
{
int n;
token *t, *newt;
char buffer[512];
char *p;
while ((p = fgets (buffer, sizeof (buffer), stdin)) != NULL) {
if (strlen (buffer) < 1)
continue;
t = lex (buffer);
if ((t == 0) || (t->type == T_EOH))
continue;
{
token *t2;
for (n = 0, t2 = t; t2; t2 = t2->next)
++n;
printf ("n = %d, tokens = %s\n", n, untoken (t, n, 0, 0));
}
/*
* Note that the untoken calls free the address token structs,
* hence the use of newt to step through the list.
*/
if (addrlist (newt = t)) {
while (t = newt) {
switch (t->ptype) {
case T_GROUPLIST:
newt = advance (t, t->plen);
printf ("group = %s\n", untoken (t, t->plen, 0, 1));
break;
#ifdef MAIL11
case T_MAIL11:
#endif
case T_ADDRSPEC:
case T_PHRASEADDR:
newt = advance (t, t->plen);
printf ("address = %s\n", untoken (t, t->plen, 0, 1));
break;
case T_IGNORE:
newt = advance (t, (t->plen ? t->plen : 1));
printf ("bad tokens: %s\n", untoken (t, t->plen, 0, 1));
break;
default:
newt = t->next;
if (t->type != T_COMMA)
printf ("unknown token \"%s\"\n",
untoken (t, 1, 0, 1));
break;
}
}
}
else
printf ("no addresses found\n");
}
exit (0);
}
#endif /* TEST */
#endif NOTMM