Columbia Kermit

home *** CD-ROM | disk | FTP | other *** search

/ Columbia Kermit / kermit.zip / mm / mm-ccmd-0.91.tar.Z / mm-ccmd-0.91.tar / work / mm / token.c < prev next >

Wrap

C/C++ Source or Header | 1990-12-18 | 16KB | 869 lines

/* * Copyright (c) 1986, 1990 by The Trustees of Columbia University in * the City of New York. Permission is granted to any individual or * institution to use, copy, or redistribute this software so long as it * is not sold for profit, provided this copyright notice is retained. */ #ifndef lint static char *rcsid = "$Header: /f/src2/encore.bin/cucca/mm/tarring-it-up/RCS/token.c,v 2.1 90/10/04 18:26:55 melissa Exp $"; #endif /* * token.c - rudimentary RFC822 address parsing */ #define MAIL11 /* for VMS type addresses */ #define ALLOWDOT /* allow dots in some extra places */ /* #define NOTMM */ /* use to get rid of MM dependencies */ #include <stdio.h> #include "token.h" #define _CHARTYPE_ARRAY_ /* necessary for chartype.h */ #ifdef NOTMM #include "chartype.h" #else #include "mm.h" #include "address.h" #endif /* NOTMM */ /* * token struct used for parsing rfc822 address lists */ typedef struct token { unsigned char type; /* token type */ unsigned char ptype; /* parse type */ unsigned short len; /* token string length */ unsigned short plen; /* parse type length in tokens */ short clen; /* length of following comment */ char *text; /* pointer to static text */ char *ctext; /* text of following comment */ struct token *next; /* pointer to next token */ } token; #ifdef TEST #define add_addresslist(a,b,c) #endif /* * test for whether we're looking at whitespace -- should be a macro */ folding (s) char *s; { while (isblank (*s)) /* skip tabs, spaces */ ++s; if (*s=='\r') ++s; if (*s == '\n' && isblank (*++s)) /* newline followed by whitespace? */ return 1; /* yes, line continuation */ return 0; } /* * given a pointer to text after an opening '(', returns a pointer to the * character following the matching ')' (or a null if the comment wasn't * terminated properly). */ char * eatcomment(s) char *s; { int parencount = 1; if (*s == '(') ++s; while (*s) { switch (*s) { case '\\': if (*++s) break; /* skip unless null */ return s; case '(': ++parencount; /* parens nest */ break; case ')': if (--parencount == 0) return ++s; break; case '\r': case '\n': /* check for continuation */ if (!folding (s)) return s; /* bad continuation line */ break; default: if (iscntrl(*s)) return s; /* return pointer to null */ } ++s; } return s; /* return pointer to null */ } /* * given a pointer to an opening '"', return a pointer to the char following * the closing '"'. */ char * eatqst(s) char *s; { while (*++s) { switch (*s) { case '\\': ++s; break; case '"': return ++s; case '\n': if (!isblank(s[1])) return s; /* not a continuation line */ break; case '\0': return s; } } } /* * allocate a token struct and fill in the type and value */ token * alloc_token (type, text, len) int type, len; char *text; { register token *t; char * calloc (); if (t = ((token *) calloc (1, sizeof (token)))) { t->type = type; t->text = text; t->len = len; } return t; } /* * break a null-terminated string into a list of tokens and return it. * basically what you'd expect, except that any paren-delimited comment * string is attached as a unit to the previous token, if any. Such * comments are lost if they appear before any "significant" tokens, but * I don't think that's much to worry about. */ token * lex (s) char *s; { register char *p = NULL; token head, *tail, *t = NULL; tail = head.next = &head; while (*s && tail) { while (isblank (*s)) { do ++s; while (isblank (*s)); if (*s == '\n') { if (folding (++s)) continue; else { /* * data pointer in T_EOH token points to unparsed text */ tail->next = alloc_token (T_EOH, s, 0); tail = tail->next; tail->next = NULL; return head.next; } } } if (!*s && (p == 0)) return NULL; p = s; /* save start of string */ if (isatom (*s) #ifdef ALLOWDOT || *s == '.' #endif ) { s++; while (isatom (*s) #ifdef ALLOWDOT || *s == '.' #endif ) s++; t = alloc_token (T_ATOM, p, (int) (s-p)); } else if (isspecial(*s)) { switch (*s) { case '(': s = eatcomment(s); tail->ctext = p; tail->clen = (s - p); continue; case ')': t = alloc_token (T_RPAREN, s, 1); break; case '<': t = alloc_token (T_LROUTE, s, 1); break; case '>': t = alloc_token (T_RROUTE, s, 1); break; case '@': t = alloc_token (T_AT, s, 1); break; case ',': if (tail->type == T_COMMA) { ++s; /* multiple commas are allowed, but */ continue; /* aren't meaningful */ } t = alloc_token (T_COMMA, s, 1); break; case ';': t = alloc_token (T_SEMI, s, 1); break; case ':': #ifdef MAIL11 if (*(s+1) == ':') { t = alloc_token(T_COLCOL, s, 2); s++; } else #endif t = alloc_token (T_COLON, s, 1); break; case '\\': if (*++s) t = alloc_token (T_QPAIR, p, 2); else return NULL; break; case '"': s = eatqst (s); t = alloc_token (T_QSTR, p, (int) (s-p)); --s; break; case '.': t = alloc_token (T_DOT, s, 1); break; case '[': t = alloc_token (T_LDOMLIT, s, 1); break; case ']': t = alloc_token (T_RDOMLIT, s, 1); } ++s; } else if (*s) ++s; /* ignore the character */ tail = (tail->next = t); } if (tail) tail->next = NULL; return head.next; } /* * advance a pointer along a token chain - should probably be a macro */ token * advance (t, n) token *t; int n; { #if TEST > 1 char *untoken (); char *p = untoken (t, n, ' ', 0); if (p) { printf ("advancing past %d tokens '%s'\n", n, p); free (p); } #endif while (t && (n-- > 0)) t = t->next; return t; } /* * turn a token list back into an ascii string. the string returned should * be released with free(). */ char * untoken (t, n, stripcomments, dofree) token *t; int n, stripcomments, dofree; { char *p, *cp, *malloc (); int i, len = 0; token *head = t; for (i = 0; t && (i < n); i++, t = t->next) { len += t->len; if (t->clen && !stripcomments) len += t->clen + 1; } len += n; /* count delimiters, trailing null */ p = cp = malloc (len+1); /* get the space */ if (p) { for (t = head, i = n; t && (i > 0); i--) { strncpy (cp, t->text, t->len); cp += t->len; if (t->clen && !stripcomments) { *cp = ' '; strncpy (++cp, t->ctext, t->clen); cp += t->clen; if (t->next && isspace(t->ctext[t->clen])) *cp++ = ' '; } else if (!stripcomments && t->next && isblank(t->text[t->len])) *cp++ = ' '; if (dofree) { token *old = t; t = t->next; free (old); } else t = t->next; } *cp = 0; } return p; } /* * are we looking at an RFC822 "phrase"? */ int phrase (t) token *t; { int len = 0; while (t && (t->type == T_QSTR || t->type == T_ATOM)) { t = advance (t, 1); ++len; } return len; } /* * try to parse a domain name (one or more dot-delimited atoms) */ int domain (t) token *t; { int n = 0, needdot = 0; if (t && (t->type == T_LDOMLIT)) return domlit (t); while (t) { if ((needdot && t->type == T_DOT) || (!needdot && t->type == T_ATOM)) needdot = ~needdot; else break; t = advance (t, 1); ++n; } if (n && !needdot) --n; return n; } /* * parse a domain literal, e.g. "[128.59.16.20]" * the argument must be a pointer to a "[" token. */ int domlit (t) token *t; { int n; if (t->type != T_LDOMLIT || (t = t->next) == NULL) return 0; n = domain (t); t = advance (t, n); if (t && t->type == T_RDOMLIT) return n + 2; return 0; } /* * parse a "local-part" of an rfc822 mailbox, consisting of a dot-delimited * list of quoted-strings and/or atoms. */ int localpart (t) token *t; { int n = 0, needdot = 0; while (t) { if ((needdot && t->type == T_DOT) || (!needdot && (t->type == T_QSTR || t->type == T_ATOM))) { needdot = ~needdot; n++; } else break; t = t->next; } if (n && !needdot) --n; /* don't swallow trailing dot */ return n; } /* * parse an RFC822 addr-spec -- "localpart@domain" */ int addrspec (t) token *t; { token *head = t; int n, len = 0; if (!t) return 0; len += (n = localpart (t)); if (n == 0) return 0; t = advance (t, n); if (t && t->type == T_AT) { len += 1; if (t = advance (t, 1)) { if (n = domain (t)) len += n; else if (n = domlit (t)) len += n; } else return 0; } head->ptype = T_ADDRSPEC; head->plen = len; return len; } /* * parse a route, e.g. "@domain,...@domain:" */ int route (t) token *t; { int n = 0, len = 0; while (t && t->type == T_AT && (t = advance (t, 1))) { if ((n = domain (t)) || (n = domlit (t))) { t = advance (t, n); len += n + 2; /* commit to next token */ if (t->type == T_COLON) return len; else if (t->type == T_COMMA) { t = advance (t, 1); continue; } break; } } return 0; } /* * parse a routeaddr -- "<@domain,...,domain:localpart@domain>" */ int routeaddr (t) token *t; { int n, len = 1; if (t) { if (t->type != T_LROUTE) return 0; if ((t = t->next) == NULL) return 0; if (t->type == T_AT) { len += (n = route (t)); if (n == 0 || ((t = advance (t, n)) == NULL)) return 0; } len += (n = addrspec (t)); if (n == 0 || ((t = advance (t, n)) == NULL)) return 0; if (t->type == T_RROUTE) return (len + 1); } return 0; } int group (t) token *t; { int n, len = 0; token *tp = t; if ((n = phrase (t)) > 0) { if (t = advance (t, n)) { if (t->type != T_COLON) return 0; len += n; t = advance (t, 1); len += 1; for (;;) { if (n = mailbox (t)) { t = advance (t, n); len += n; if (t && (t->type == T_COMMA)) { len += 1; t = advance (t, 1); continue; } } break; } if (t && (t->type == T_SEMI)) { tp->ptype = T_GROUPLIST; tp->plen = ++len; t->ptype = T_GROUPEND; t->plen = 1; return len; } } } return 0; } #ifdef MAIL11 /* * parse mail11 addresses * or at least the hostname:: part. */ int mail11_mailbox (t) token *t; { int n; token *head = t; if (t->type == T_ATOM) { t = advance(t,1); if (t && (t->type == T_COLCOL)) { t = advance(t,1); n = addrspec(t); if (n == 0) return(0); head->ptype = T_MAIL11; head->plen = n + 2; return(n + 2); /* addr_spec + hostname + "::" */ } } return(0); } #endif /* * Parse "phrase route-addr" or "addrspec". */ int rfc822_mailbox (t) token *t; { token *head = t; int n, len; if (n = phrase(t)) { len = n; t = advance (t, n); /* skip past it */ } if (n = routeaddr (t)) { head->ptype = T_PHRASEADDR; head->plen = len + n; return head->plen; /* if followed by route-addr, done */ } return addrspec (head); /* see if it's an addrspec */ } int mailbox (t) token *t; { int n; #ifdef MAIL11 if (n = mail11_mailbox(t)) return(n); #endif if (n = rfc822_mailbox(t)) return(n); return(0); } static int addrlist (t) token *t; { token *tp; int n, naddrs = 0; while (t) { tp = t; if (n = group (t)) { ++naddrs; if (t = advance (t, n)) { if (t->type == T_COMMA) { t->ptype = T_COMMA; t->plen = 1; t = advance (t, 1); continue; } } else break; } else if (n = mailbox (t)) { ++naddrs; if (t = advance (t, n)) { if (t->type == T_COMMA) { t->ptype = T_COMMA; t->plen = 1; t = advance (t, 1); continue; } } else break; } if (t == NULL || t->type == T_EOH) return naddrs; /* parse problem - mark invalid tokens and try to continue */ tp->ptype = T_IGNORE; /* mark token string bad */ tp->plen = 1; t = tp->next; /* munch tokens till we find comma or end of string */ while (t && (t->type != T_COMMA) && (t->type != T_EOH)) { ++tp->plen; t = advance (t, 1); } /* eat following comma */ if (t && (t->type == T_COMMA)) { t = advance (t, 1); } } return naddrs; } char * unspace(str) char *str; { char *cp; while(isspace(*str)) str++; cp = str + strlen(str) - 1; while(isspace(*cp)) *cp-- = '\0'; return(str); } #ifndef NOTMM match_addresses (a, buf, len) char **buf; addresslist *a; int len; { int n; token *t, *newt; token *t2; char *p,*cp; if (strlen (*buf) < 1) return; t = lex (*buf); if ((t == 0) || (t->type == T_EOH)) return; for (n = 0, t2 = t; t2; t2 = t2->next) ++n; /* * Note that the untoken calls free the address token structs, * hence the use of newt to step through the list. */ if (addrlist (newt = t)) { int n; token *t1; while (t = newt) { switch (t->ptype) { case T_GROUPLIST: for (n = 1, t1 = advance(t,1); t1->ptype == T_NONE; t1 = advance(t1,1),n++) ; newt = advance (t, n); add_addresslist(a, unspace(untoken(t,n - 1,0,1)), ADR_GROUP); break; case T_GROUPEND: newt = advance (t, 1); add_addresslist(a, unspace(untoken(t,1,0,1)), ADR_GROUPEND); break; #ifdef MAIL11 case T_MAIL11: #endif case T_ADDRSPEC: case T_PHRASEADDR: newt = advance (t, t->plen); cp = unspace(untoken(t,t->plen,0,1)); #ifndef TEST if (strcmp(cp,".") == 0) add_addresslist(a,user_name,ADR_ADDRESS); else if (*cp == '*') add_addresslist(a,tilde_expand(cp+1),ADR_FILE); else if (lookup_alias(cp)) add_addresslist(a,cp,ADR_ALIAS); else #endif add_addresslist(a,cp,ADR_ADDRESS); break; case T_IGNORE: newt = advance (t, (t->plen ? t->plen : 1)); cp = unspace(untoken (t, t->plen, 0, 1)); if (use_address(cp)) add_addresslist(a, cp, ADR_ADDRESS); break; default: newt = t->next; #ifdef TEST if (t->type != T_COMMA) printf ("unknown token \"%s\"\n", untoken (t, 1, 0, 1)); #endif break; } } } } use_address(str) char *str; { #ifndef TEST extern use_invalid_address; switch(use_invalid_address) { case SET_YES: return(true); case SET_NO: printf("Invalid address: \"%s\"\n", str); return(false); case SET_ASK: printf("Invalid address: \"%s\"\n", str); return(yesno("Use anyway? ")); } #else printf("Invalid address: \"%s\"\n", str); return(0); #endif } #if TEST main(argc,argv) int argc; char **argv; { addresslist a; char *buf; buf = (char *)malloc(512); a.first = a.last = NULL; while (fgets (buf, 512, stdin) != NULL) { match_addresses(&a,&buf,strlen(buf)); } } #endif /* TEST */ #else /* NOTMM */ #if TEST main (argc, argv) int argc; char *argv[]; { int n; token *t, *newt; char buffer[512]; char *p; while ((p = fgets (buffer, sizeof (buffer), stdin)) != NULL) { if (strlen (buffer) < 1) continue; t = lex (buffer); if ((t == 0) || (t->type == T_EOH)) continue; { token *t2; for (n = 0, t2 = t; t2; t2 = t2->next) ++n; printf ("n = %d, tokens = %s\n", n, untoken (t, n, 0, 0)); } /* * Note that the untoken calls free the address token structs, * hence the use of newt to step through the list. */ if (addrlist (newt = t)) { while (t = newt) { switch (t->ptype) { case T_GROUPLIST: newt = advance (t, t->plen); printf ("group = %s\n", untoken (t, t->plen, 0, 1)); break; #ifdef MAIL11 case T_MAIL11: #endif case T_ADDRSPEC: case T_PHRASEADDR: newt = advance (t, t->plen); printf ("address = %s\n", untoken (t, t->plen, 0, 1)); break; case T_IGNORE: newt = advance (t, (t->plen ? t->plen : 1)); printf ("bad tokens: %s\n", untoken (t, t->plen, 0, 1)); break; default: newt = t->next; if (t->type != T_COMMA) printf ("unknown token \"%s\"\n", untoken (t, 1, 0, 1)); break; } } } else printf ("no addresses found\n"); } exit (0); } #endif /* TEST */ #endif NOTMM