PC-Online 1996 May

home *** CD-ROM | disk | FTP | other *** search

/ PC-Online 1996 May / PCOnline_05_1996.bin / linux / source / contrib / smail / smail-3.1 / smail-3 / smail-3.1.28 / src / field.c < prev next >

Wrap

C/C++ Source or Header | 1992-07-11 | 50KB | 1,654 lines

/* @(#)src/field.c 1.6 7/11/92 11:49:10 */ /* * Copyright (C) 1987, 1988 Ronald S. Karr and Landon Curt Noll * Copyright (C) 1992 Ronald S. Karr * * See the file COPYING, distributed with smail, for restriction * and warranty information. */ /* * field.c: * routines to process header fields and alias/.forward files * * The routines defined in this file are not complicated * conceptually. The basic algorithm is tokenize a string, * match patterns of tokens to specific addressing forms, * and insert a comma separator between addresses, if * necessary. * * The pattern matching is made somewhat more complicated * in that when an address is found, it may be modified * to make it somewhat more conforming to standards than * it may have been to begin with. Also, an address may * be extracted and added to a list. * * external functions: process_field, tokenize, detokenize, dump_tokens */ #include <stdio.h> #include <ctype.h> #include "defs.h" #include "smail.h" #include "field.h" #include "addr.h" #include "log.h" #include "dys.h" #include "exitcodes.h" #ifndef DEPEND # include "debug.h" # include "extern.h" #endif /* functions local to this file */ static char *finish_mod_clean(); static void insert_comma(); static int match_route_or_group(); static int match_group_term(); static int match_general(); static char *queue_qualify_domain(); static int enqueue_address(); /* macros local to this file */ #define DUMP_TOKENS(d,t) {if (d <= debug) dump_tokens(t);} /* * tokenize - turn a string into a queue of tokens. * * Given a string, parse the string into a list of tokens. The list * is to be terminated by either an ERRORTOK or an ENDTOK token which * do not have a successor. * * This routine is somewhat long, however, it is basically a state * machine with some initialization at the beginning and cleanup at * the end. * * inputs: * field - string to tokenize. * ret_q - address of a struct token variable in which to * return the head of the queue of tokens. * alias - TRUE if # is a comment character and ':include:' * is allowed at the beginning of text tokens. For * use in parsing alias, forward and mailing list * files. * space - TRUE if white space is to be put in space. * output: * an error message is returned on error, or NULL if no error. * Also, the value pointed to by ret_q is filled with the header * of the queue of tokens. * * called by: process_field, external functions */ char * tokenize(field, ret_q, alias, space) char *field; /* string to be tokenized */ struct token **ret_q; /* return start of token queue here */ int alias; /* TRUE if scanning alias file */ int space; /* if TRUE put space in space */ { struct token *tq; /* member pointer for building queue */ register char *fp; /* pointer to chars in field */ char *p; struct str str; register struct str *sp = &str; /* pointer to string building region */ enum e_state { /* state machine definitions */ s_domlit, /* inside a domain literal */ s_text, /* inside a text literal token */ s_quote, /* inside a quoted literal */ s_comment, /* inside a comment */ s_cquote, /* previous character was a \ */ s_space, /* skipping through white space */ s_newtok, /* finished a token, start a new one */ s_hash_comment, /* comment from '#' to a newline */ } state; enum e_state save_state; /* save state from before a \ */ int comment_level; /* embeddedness level in comments */ char *non_text_tokens; /* chars not in text literal tokens */ int text_offs; /* offset to text area in p */ /* * initialize state */ if (alias) { /* if parsing alias file, # is white space */ non_text_tokens = ":;<>][\",.!%@ \t\n#"; } else { /* otherwise, # is a token char */ non_text_tokens = ":;<>][\",.!%@ \t\n"; } /* initialize the dynamic string variables */ STR_INIT(sp); /* allocate space for initial token */ *ret_q = tq = (struct token *)xmalloc(sizeof(*tq)); /* begin by reading through white space */ state = s_space; /* * loop until we have reached the end of the string, * going through the state machine to build up tokens. */ for (fp = field; *fp != '\0'; fp++) { switch(state) { /* * initial state * * scan for the end of white space and when found set * state as appropriate to the next character. If the * next state is to be anything other than s_comment * or s_hash_comment, finish off the white-space associated * with the current token and begin the text associated * with the current token. * * entry state: s_newtok, s_comment * exit state: s_comment, s_quote, s_text, s_comment, s_domain, * s_hash_comment, s_newtok */ case s_space: if (alias && *fp == '#') { /* found a '#' comment, skip through to the end of the line */ state = s_hash_comment; } else { if (*fp == '(') { /* found a comment, scan through it next */ state = s_comment; comment_level = 1; /* comment finished when this is 0 */ if (space) { STR_NEXT(sp, *fp); } break; } else if (isspace(*fp)) { if (space) { STR_NEXT(sp, *fp); } break; /* continue with space token */ } else { if (space) { /* end of white space and comments preceding token */ STR_NEXT(sp, '\0'); /* end white space */ } /* * leave room for possible comma in the white space * also, if we are not putting white space in space, * this will at least make space valid */ STR_NEXT(sp, '\0'); } text_offs = sp->i; /* mark offset for token text in p */ /* determine what form this token will be */ switch (*fp) { case '[': /* a domain literal comes next */ state = s_domlit; tq->form = T_DOMLIT; break; case '"': /* a quoted literal comes next */ state = s_quote; tq->form = T_QUOTE; break; case '\\': /* text token with first char quoted */ state = s_cquote; save_state = s_text; /* state after \ */ tq->form = T_TEXT; break; default: if (alias && *fp == ':' && strncmpic(fp, ":include:", sizeof(":include:")-1) == 0) { p = fp + sizeof(":include:") - 1; while (isspace(*p) && *p != '\n') p++; if (*p && index(non_text_tokens, *p) == NULL) { str_ncat(sp, fp, p - fp); fp = p; state = s_text; tq->form = T_TEXT; break; } } if (index(non_text_tokens, *fp)) { state = s_newtok; tq->form = T_OPER; } else { state = s_text; tq->form = T_TEXT; } break; } STR_NEXT(sp, *fp); /* copy character into token */ } break; /* * a comment was begun with a '#' character and a newline * terminates it. This state is entered only when parsing * an alias file. * * entry state: s_space * exit state: s_space */ case s_hash_comment: if (*fp == '\n') { state = s_space; } break; /* * a domain literal was begun with a '[' character * and a ']' terminates it, however, a "\]" sequence * does not terminate a domain literal. * * entry state: s_space * exit state: s_newtok */ case s_domlit: STR_NEXT(sp, *fp); if (*fp == '\\') { /* \ quotes next character, save s_domlit state */ save_state = s_domlit; state = s_cquote; } else if (*fp == ']') { /* ] terminates a domain literal */ state = s_newtok; } break; /* * a text token was begun by a non white space character * which is not in the set "[!@%.\"" and ends with a white * space character or a character that is in that set. * a special character can be prefixed with \ to be included * in the text literal. * * entry state: s_space * exit state: s_newtok */ case s_text: if (*fp == '\\') { /* \ quotes next character, save s_text state */ STR_NEXT(sp, *fp); /* copy char into token */ save_state = s_text; state = s_cquote; } else if (index(non_text_tokens, *fp)) { /* space or an operator follows a text literal */ fp--; /* re-scan character */ state = s_newtok; } else { STR_NEXT(sp, *fp); /* copy char into token */ } break; /* * a quoted literal was begun by a " character and ends with * a " character. A \" sequence does not end a quoted literal. * * entry state: s_space * exit state: s_newtok */ case s_quote: STR_NEXT(sp, *fp); /* copy char into token */ if (*fp == '\\') { /* \ quotes next character, save s_quote state */ save_state = s_quote; state = s_cquote; } else if (*fp == '"') { /* " terminates a quoted literal */ state = s_newtok; } break; /* * a comment begins with a ( and ends when a balancing ) is * found. A \( or \) sequence does not count in determining * balancing of parentheses. * * entry state: s_space * exit state: s_space */ case s_comment: if (space) { STR_NEXT(sp, *fp); /* copy char into token */ } if (*fp == '\\') { /* \ quotes next character, save s_comment state */ save_state = s_comment; state = s_cquote; } else if (*fp == ')') { comment_level--; if (comment_level == 0) { /* balanced parentheses--done with comment */ state = s_space; } } else if (*fp == '(') { comment_level++; } break; /* * \ escape in quote, text literal, comment or domain * include the character following a \ in the token and * retain the previous state. * * entry state: s_quote, s_text, s_comment or s_domain * exit state: the entry state */ case s_cquote: STR_NEXT(sp, *fp); /* copy character into token */ /* restore previous state */ state = save_state; break; /* * finished up a complete token--set up for the next one. * this involes ending the dynamic string region * creating a new token and linking the previous token * before the new one. * * entry state: s_quote, s_text, s_comment, s_domain * exit state: s_space */ case s_newtok: /* finished a token, setup for next */ /* finish up dynamic string region */ STR_NEXT(sp, '\0'); STR_DONE(sp); /* create new token which is the current token's successor */ tq->succ = (struct token *)xmalloc(sizeof(*tq)); tq->space = sp->p; /* mark pointer to white space */ tq->text = sp->p + text_offs; /* mark pointer to token text */ tq = tq->succ; /* scan through white space next */ state = s_space; /* create a new dynamic string region */ STR_INIT(sp); fp--; /* re-read current character */ break; } } /* * we reached the end of the string. This is either okay, if we * are scanning white space or a text literal, or it is not okay. * if we are scanning white space or a comment, we need to close * off the white-space for the token, properly, otherwise we need * to close off the text associated with the token. */ if (state == s_hash_comment) { state = s_space; } if (state == s_space || state == s_comment) { /* no token text exists for last token, fill in with empty text */ if (space) { STR_NEXT(sp, '\0'); /* terminate space */ } STR_NEXT(sp, '\0'); /* leave room for a possible comma */ tq->text = ""; /* empty text */ STR_DONE(sp); } else { /* last token does contain some text */ STR_NEXT(sp, '\0'); STR_DONE(sp); tq->text = sp->p + text_offs; } tq->space = sp->p; /* first part of p is the white space and comments */ /* * if the current token is white space, then make it the ending * token in the generated list. Otherwise, allocate a new token * with no text or white-space and make that the ending token. * In the second case, check for errors as well. */ if (state == s_space) { tq->form = T_END; tq->succ = NULL; } else { struct token *end_q; end_q = tq->succ = (struct token *)xmalloc(sizeof(*end_q)); end_q->text = end_q->space = ""; end_q->form = T_END; end_q->succ = NULL; /* is it an error? */ if (state == s_cquote) { return end_q->text = "no character after \\"; } /* what was the specific state for the error */ switch (state) { case s_domlit: /* unterminated domain literal */ tq->form = T_ERROR; return end_q->text = "unterminated domain literal"; case s_comment: tq->form = T_ERROR; return end_q->text = "unterminated comment"; case s_quote: tq->form = T_ERROR; return end_q->text = "unterminated quoted literal"; } } /* * everything went fine. ret_q is computed queue of tokens. * don't return an error message. */ return NULL; } /* * detokenize - convert a list of tokens into its string representation * * given a queue of tokens, such as produced by tokenize, return * a string corresponding to the space and text of the tokens. * * inputs: * buf - buffer in which to store result. NULL if we should * use the dynamic string region facility. If buf is * non-NULL it is assumed to be large enough to store * the result * tq_head - head of queue of tokens * tq_end - end of tokens to tokenize, or NULL to tokenize up * to an ENDTOK token * * output: * string representing list of tokens * * called by: finish_modified_clean, enqueue_address, external functions */ char * detokenize(space, buf, tq_head, tq_end) int space; /* TRUE if space should be copied */ char *buf; /* store result here, if non-NULL */ struct token *tq_head; /* list of tokens to detokenize */ struct token *tq_end; /* end of tokens to, or NULL */ { register struct token *tq; /* temp for scanning through tokens */ if (buf) { register char *bp= buf; /* point to buf */ bp[0] = '\0'; /* loop through contatenating space and text from tokens */ for (tq = tq_head; !ENDTOK(tq->form); tq = tq->succ) { if (space) { (void)strcat(bp, tq->space); } (void)strcat(bp, tq->text); if (tq == tq_end) { return bp; } } /* get the white space from the ending token */ (void)strcat(bp, tq->space); return bp; /* return the buffer */ } else { struct str str; register struct str *sp = &str; /* dynamic string region */ STR_INIT(sp); /* initialize dynamic string region */ for (tq = tq_head; !ENDTOK(tq->form); tq = tq->succ) { if (space) { STR_CAT(sp, tq->space); } STR_CAT(sp, tq->text); if (tq == tq_end) { STR_NEXT(sp, '\0'); /* null terminate */ STR_DONE(sp); /* finish dynamic string */ return sp->p; /* return string */ } } STR_CAT(sp, tq->space); /* add space from last token */ STR_NEXT(sp, '\0'); /* null terminate */ STR_DONE(sp); /* finish dynamic string */ return sp->p; /* return it */ } } /* * process_field - cleanly separate addresses in a header field, and extract * and cleanup those addresses * * given a header field which contains addresses, cleanly separate * each address with a comma, if it is not separated already. * Optionally clean local addresses by appending an RFC822 '@domain' form. * * Recognized addressing forms are: * * ANY*<ANY*> - route, can be recursive. * ANY*: - beginning of a group. * ;[@WORD] - end of a group. * WORD [op WORD [op ... WORD]] * - op is from the list ".!%@" * the simple form "WORD" is a local address. * * inputs: * field - a header field which contains addresses. If NULL * no header is returned. * fp - start of region to tokenize and clean. * domain - if non-NULL, a domain which is to be appended in * RFC822 '@domain' form to local addresses. * uucp_host - if non-NULL, a string to prepend to ! routes. * The purpose of this field is to keep ! routes in * From: or Sender: fields in ! route notation and to * ensure that the ! route will correctly return to * the sender, assuming software on other machines * doing something else. * extract_q - Address queue in which to insert extracted addresses. * NULL if we are not extracting addresses. * flags - A bitwise or of the following flags from field.h: * F_LOCAL - set if message originated on the local host. * This causes domains to be fully qualified. * F_STRICT - set to adhere more closely to RFC822. When * this is set, then all local addressing forms, * bang routes and tokens%domain forms are appended * with @domain, if domain is given, and prepended * with uucp_host, if uucp_host is given. This is * for use in gatewaying to stricter networks. * F_ALIAS - set to parse an aliases-style file. In these * cases, '#' introduces a comment and a the * string ":include:" is allowed at the start of * a text token, and does not introduce a group. * error - if an error occurs, an error message is stored here, * otherwise error is left alone. * * output: * a header cleaned according to the rules stated above, or NULL * if `field' was NULL. * * called by: external functions * calls: tokenize, match_route_or_group, match_group_term, match_general */ char * process_field(field, fp, domain, uucp_host, extract_q, flags, error) char *field; /* header field to be cleaned */ char *fp; /* pointer to field contents */ char *domain; /* domain to add to local addresses */ char *uucp_host; /* uucp host to prepend to ! routes */ struct addr **extract_q; /* queue in which to put addresses */ int flags; /* miscellaneous flags */ char **error; /* store error message here */ { int modified = FALSE; /* set to TRUE if field is modified */ char *error_message; /* error returned by tokenize */ struct token *tq_head; /* list of tokens to return */ struct token *tq_anchor; /* anchor point for pattern scan */ struct token *tq_new; /* new anchor found by pattern scan */ int new_group = FALSE; /* set if group: pattern newly found */ int group = FALSE; /* set when inside of a group */ unsigned len = 0; /* length of cleaned header */ int need_comma = FALSE; /* TRUE if we may need a , at anchor */ int check_route = TRUE; /* TRUE if we must scan for routes */ int i; /* temp */ if (field) { len = strlen(field) + 1; } DEBUG(DBG_FIELD_LO, "process_field: entry\n"); /* tokenize the contents to make parsing easy */ error_message = tokenize(fp, &tq_head, flags&F_ALIAS, field != NULL); DUMP_TOKENS(DBG_FIELD_HI, tq_head); /* * If tokenize found an error, then there is a syntax error * which would make processing this header of dubious value. * If we are not depending on the correctness of the header * for extracting addresses, this is not enough to warrant return * of mail. */ if (error_message) { *error = error_message; DEBUG1(DBG_FIELD_LO, "process_field: error: %s\n", error_message); return field; /* return field unmodified */ } /* * scan through until no more tokens are left * * starting at anchor points, find an addressing form that * matches a set of tokens starting at that anchor point. * If the addressing form needs to be separated from the previous * by a comma, and it is not currently so separated then * insert a comma in the white space before the anchor point token. */ tq_anchor = tq_head; while (!ENDTOK(tq_anchor->form)) { tq_new = NULL; /* set when address pattern found */ if (check_route) { /* scan for: phrase <route-addr> or phrase : */ i = match_route_or_group(tq_anchor, &tq_new, extract_q, group, &len, domain, uucp_host, flags, error); switch (i) { case T_NOMATCH: /* didn't match route or group */ tq_new = NULL; break; case T_ROUTE: /* matched a route */ /* tq_new points to end of complete route form */ break; case T_GROUP: /* matched a group */ /* tq_new points to : at end of group */ group = TRUE; /* NOTE: next address does not need comma separator */ new_group = TRUE; break; case T_MODIFIED: /* matched and something modified */ modified = TRUE; break; default: /* error occured */ DEBUG1(DBG_FIELD_LO, "process_field: error: %s\n", *error); return field; /* return the field unchanged */ } } if (!tq_new) { /* scan for group terminator: ;[@WORD] */ i = match_group_term(tq_anchor, &tq_new, extract_q, group, error); switch (i) { case T_NOMATCH: /* didn't match group terminator */ tq_new = NULL; break; case T_GROUPTERM: /* matched a group terminator */ /* tq_new points to end of complete group terminator */ need_comma = FALSE; /* never need a comma before this */ group = FALSE; /* not in a group anymore */ break; default: /* error occured */ DEBUG1(DBG_FIELD_LO, "process_field: error: %s\n", *error); return field; /* return the field unchanged */ } } if (!tq_new) { /* * scan for: WORD [op WORD [op ... WORD]] * where op is from the set [.!%@] and the sequence * ends in a WORD. */ i = match_general(tq_anchor, &tq_new, &len, extract_q, domain, uucp_host, flags, error); DEBUG1(DBG_FIELD_MID, "match_general returned %d\n", i); switch (i) { case T_NOMATCH: /* didn't match general address form */ tq_new = NULL; break; case T_GENERAL: /* matched a general address */ /* tq_new points to end of address */ DEBUG(DBG_FIELD_MID, "just match, no mods\n"); break; case T_MODIFIED: /* matched and changed in some way */ /* tq_new points to end of address */ modified = TRUE; /* modified in match_general */ break; case T_MUTANT_FORM: /* not allowed outside of a route */ *error = "mutant addressing form outside of route"; DEBUG1(DBG_FIELD_LO, "process_field: error: %s\n", *error); return field; /* return the field unchanged */ default: /* error occured */ DEBUG1(DBG_FIELD_LO, "process_field: error: %s\n", *error); return field; /* return the field unchanged */ } } if (!tq_new) { /* we didn't find an addressing form that matched */ *error = "unknown addressing form"; DEBUG1(DBG_FIELD_LO, "process_field: error: %s\n", *error); return field; } if (need_comma && field != NULL) { /* there is an address and previous address needs a comma */ insert_comma(tq_anchor->space); modified = TRUE; /* field has been modified */ len++; /* 1 character inserted */ } /* * set state for next pass through the loop */ if (new_group) { /* * if a group was found, the next address should not be * preceded by a comma and the next token is the token * immediately following the : */ need_comma = FALSE; new_group = FALSE; tq_anchor = tq_new->succ; } else { if (tq_new->succ->text[0] == ',') { /* * if the next token is a comma, then we will not need to * insert one before the next address. * The next token is the one after the ',' */ need_comma = FALSE; /* skip the comma */ tq_anchor = tq_new->succ->succ; } else { /* * not a new group, and next token not a comma, we * may need to insert a comma before the next address * The next token is the one after the end of the previous * match. */ need_comma = TRUE; tq_anchor = tq_new->succ; } } } if (modified && field != NULL) { /* copy finished results into buffer for returning to caller */ field = finish_mod_clean(field, (unsigned)(fp-field), tq_head, len); } DEBUG1(DBG_FIELD_LO, "process_field: return %s\n", field); return field; /* all done, return the header */ } /* * finish_mod_clean - return string for field name token list * * Finish process_field for the case that the header field was modified * by copying the field name and the tokens into a string area and * returning a pointer to the string. * * inputs: * field_name - string to copy to beginning of buffer * name_len - number of chars to copy from field_name * tq_head - token queue to copy into buffer * len - computed total length of result * * output: * pointer to string representing completed header field * * called by: process_field * calls: detokenize */ static char * finish_mod_clean(field_name, name_len, tq_head, len) char *field_name; /* field name string */ unsigned name_len; /* length of field name */ struct token *tq_head; /* head of list to convert to string */ unsigned len; /* computed length of result */ { register char *p = xmalloc(len); /* where to store result */ DEBUG(DBG_FIELD_HI, "field was modified--build string for return\n"); DEBUG1(DBG_FIELD_HI, "field = %s\n", field_name); /* copy field name up to colon */ (void)memcpy(p, field_name, name_len); /* * copy space and text from queued tokens */ (void)detokenize(TRUE, p+name_len, tq_head, (struct token *)NULL); DEBUG1(DBG_FIELD_HI, "completed string: %s\n", p); return p; } /* * insert_comma - insert a comma after a comment or at beginning of string * * Given the space field from a token, insert a ',' character either * after the last comment (if one exists) or at the beginning of the * string, if no comment exists in the string. * * input: * s - string in which to insert a comma * * outputs: * none * * called by: process_field */ static void insert_comma(s) char *s; { register char *p; /* end point of copy */ register char *q; /* temp pointer */ /* put comma at beginning of white space or after last comment */ p = rindex(s, ')'); if (!p) { p = s; } else { p++; /* advance beyond ) */ } /* copy text up one byte to allow space for comma */ for (q = p+strlen(p)+1; q != p; --q) { q[0] = q[-1]; } *q = ','; /* insert the comma */ } /* * match_route_or_group - reduce on a route or group form if possible * * This function is called by process_field to determine if the current * anchor point is the beginning of a route or a group. If so the * route or group is processed and the end of the route or group is * returned. * * inputs: * tq_anchor - the anchor point from process_field. * tq_new - pointer to variable in which to return the end * of the matched form. * extract_q - Address queue in which to insert extracted addresses. * NULL if we are not extracting addresses. * group - TRUE if a matched group would be recursive, * this is specifically an RFC822 no-no and is likely * to mean that an unsupported addressing form has * been used. * domain - if non-NULL, a domain which is to be appended in * RFC822 '@domain' form to local addresses. * uucp_host - if non-NULL, a string to prepend to ! routes. * The purpose of this field is to keep ! routes in * From: or Sender: fields in ! route notation and to * ensure that the ! route will correctly return to * the sender, assuming software on other machines * doing something else. * flags - A bitwise or of the following flags from field.h: * F_LOCAL - set if message originated on the local host. * This causes domains to be fully qualified. * F_STRICT - set to adhere more closely to RFC822. When * this is set, then all local addressing forms, * bang routes and tokens%domain forms are appended * with @domain, if domain is given, and prepended * with uucp_host, if uucp_host is given. This is * for use in gatewaying to stricter networks. * F_ALIAS - set to parse an aliases-style file. In these * cases, '#' introduces a comment and a the * string ":include:" is allowed at the start of * a text token, and does not introduce a group. * error - store an error message here if an error occurs. * * output: * T_NOMATCH if not matched, T_ROUTE if matched a route, * T_GROUP if matched a group, * T_MODIFIED if general addressing form which modified field, * FAIL if error. * * called by: process_field * calls: match_general, enqueue_address */ static int match_route_or_group(tq_anchor, tq_new, extract_q, group, len, domain, uucp_host, flags, error) struct token *tq_anchor; /* anchor point from process_field */ struct token **tq_new; /* return last matched token here */ struct addr **extract_q; /* queue in which to put addresses */ unsigned *len; /* len variable from process_field */ int group; /* TRUE if group would be recursive */ char *domain; /* domain to add to local addresses */ char *uucp_host; /* uucp host to prepend to ! routes */ int flags; /* miscellaneous flags */ char **error; /* store error message here */ { register struct token *tq; /* temp for scanning tokens */ int recursion_level = 1; /* embeddedness of route */ struct token *tq_start; /* start of innermost route */ struct token *tq_end; /* end of innermost route */ struct token *tq_temp; /* temp */ int seek_end; /* we are scanning for end of route */ int i; /* temp */ *tq_new = NULL; /* nothing yet */ /* * scan through tokens until we know what we have: * a route, a group or something else. */ tq = tq_anchor; for (;;) { if (tq->text[0] == ',' || ENDTOK(tq->form)) { /* we have something else nothing left to do here */ return T_NOMATCH; /* we didn't match anything */ } if (tq->text[0] == '<') { /* we have a route */ DEBUG(DBG_FIELD_MID, "We have a route\n"); *tq_new = tq; break; } if (tq->text[0] == ':' && tq->form == T_OPER) { /* we have a group */ DEBUG(DBG_FIELD_MID, "We have a group\n"); *tq_new = tq; if (group) { /* catch recursive groups */ *error = "recursive address group"; return FAIL; } return T_GROUP; /* signal that we have a group */ } tq = tq->succ; /* get next token */ } /* * we have a route, search for end point of route * and note the tokens in the innermost recursion * level so that we can extract them as an address. */ tq_start = (*tq_new)->succ; seek_end = 1; /* assume we are innermost for now */ /* allow recursion because it happens sometimes */ for (tq = (*tq_new)->succ; !ENDTOK(tq->form); tq = tq->succ) { if (tq->text[0] == '<') { recursion_level++; DEBUG(DBG_FIELD_HI, "bump up recursion level on route\n"); /* at more deeply nested address, forget what we had before */ tq_start = tq->succ; tq_end = NULL; seek_end = 1; } else if (tq->text[0] == '>') { recursion_level--; DEBUG(DBG_FIELD_HI, "bump down recursion level on route\n"); seek_end = 0; /* the end, if no more < tokens */ if (recursion_level == 0) { break; } } else if (seek_end) { tq_end = tq; /* could be the end of the address */ } } *tq_new = tq; /* end of matched route */ if (recursion_level) { *error = "unterminated route"; return FAIL; /* signal an error */ } if (tq_end == NULL) { *error = "null route"; return FAIL; /* signal an error */ } /* * route may match a general WORD op WORD op ... WORD form */ i = match_general(tq_start, &tq_temp, len, extract_q, domain, uucp_host, flags, error); switch (i) { case T_NOMATCH: break; /* didn't match */ case T_MUTANT_FORM: break; /* mutant form allowed in route */ case T_GENERAL: /* match, didn't modify anything */ if (tq_temp != tq_end) { /* not a complete match--this is a problem */ *error = "syntax error in address"; return FAIL; } return T_ROUTE; /* matched route, nothing modified */ case T_MODIFIED: /* match and something was modified */ return T_MODIFIED; default: /* an error occured */ return FAIL; /* propogate the error */ } if (extract_q) { if (enqueue_address(extract_q, tq_start, tq_end, error) == FAIL) { /* enqueue_address returned error, specific error already logged */ return FAIL; /* signal an error */ } } return T_ROUTE; /* signal a route */ } /* * match_group_term - match a group terminator pattern (;[@TOKEN]). * * Called from check_field to determine if the tokens after the * anchor point match a group terminator pattern (a semicolon optionally * followed by the pattern @WORD. * * inputs: * tq_anchor - the anchor point from process_field. * tq_new - pointer to variable in which to return the end * of the matched form. * extract_q - Address queue in which to insert extracted addresses. * NULL if we are not extracting addresses. * group - TRUE if we are now in a group. If this is not * the case then a match on a group terminator would * be an error. * error - store an error message here, on errors. * * output: * T_NOMATCH if not matched, T_GROUPTERM if match, FAIL on error. */ /*ARGSUSED*/ static int match_group_term(tq_anchor, tq_new, extract_q, group, error) struct token *tq_anchor; /* anchor point from process_field */ struct token **tq_new; /* return last matched token here */ struct addr **extract_q; /* queue in which to add addresses */ int group; /* TRUE if we are processing a group */ char **error; /* store error message here */ { register struct token *tq; /* temp for scanning list of tokens */ tq = tq_anchor; /* copy this into a register */ /* * if first token is a ; then we have a terminator and it * just remains to see if an optional, correct @WORD pattern * follows it, or if matching a group terminator is an error. */ if (tq->text[0] == ';') { if (!group) { /* no matching group : form exists, this is not correct */ *error = "\";\" does not terminate a group"; return FAIL; /* signal an error */ } if (tq->succ->text[0] == '@') { /* optional @WORD given, make sure the WORD exists */ *tq_new = tq = tq->succ->succ; if (!WORDTOK(tq->form)) { *error = "syntax error in address"; return FAIL; /* signal an error */ } DEBUG(DBG_FIELD_MID, "group terminator of form ;@WORD\n"); return T_GROUPTERM; /* match */ } else { /* no optional @WORD */ DEBUG(DBG_FIELD_MID, "simple group terminator\n"); *tq_new = tq; return T_GROUPTERM; /* match */ } } return T_NOMATCH; /* no match */ } /* * match_general - match a general address form WORD [op WORD [op ... WORD]] * * Called from check_field to determine if the tokens after the * anchor point match a general address form, which is a sequence * of WORD tokens separated by operators from the set ".!%@". * * If domain is given and we have an address which is just WORD, then * append @domain to the address. * * If uucp_host is given and we have a bang route, then prepend * uucp_host! to the address. * * If local is TRUE and address is WORD*@WORD1 or WORD*%WORD1 then * have the domain WORD1 fully qualfied if possible. * * inputs: * tq_anchor - the anchor point from process_field. * tq_new - pointer to variable in which to return the end * of the matched form. * len - len variable from check_form. This routine may modify * an address. If so, the len variable is modified to * taken into account the change in length of the * header field. * extract_q - Address queue in which to insert extracted addresses. * NULL if we are not extracting addresses. * domain - if non-NULL, a domain which is to be appended in * RFC822 '@domain' form to local addresses. * uucp_host - if non-NULL, a string to prepend to ! routes. * The purpose of this field is to keep ! routes in * From: or Sender: fields in ! route notation and to * ensure that the ! route will correctly return to * the sender, assuming software on other machines * doing something else. * flags - A bitwise or of the following flags from field.h: * F_LOCAL - set if message originated on the local host. * This causes domains to be fully qualified. * F_STRICT - set to adhere more closely to RFC822. When * this is set, then all local addressing forms, * bang routes and tokens%domain forms are appended * with @domain, if domain is given, and prepended * with uucp_host, if uucp_host is given. This is * for use in gatewaying to stricter networks. * F_ALIAS - set to parse an aliases-style file. In these * cases, '#' introduces a comment and a the * string ":include:" is allowed at the start of * a text token, and does not introduce a group. * error - store any error message here. * * output: * T_NOMATCH if no match found, T_GENERAL if match and unmodified, * T_MODIFIED if @domain appended, uucp_host! prepended, or domain * qualified, FAIL on error. * * called by: process_field * calls: queue_qualify_domain, enqueue_address */ static int match_general(tq_anchor, tq_new, len, extract_q, domain, uucp_host, flags, error) struct token *tq_anchor; /* anchor point from process_field */ struct token **tq_new; /* return last matched token here */ unsigned *len; /* len variable from process_field */ struct addr **extract_q; /* queue in which to add addresses */ char *domain; /* domain to add to local addresses */ char *uucp_host; /* uucp host to prepend to ! routes */ int flags; /* miscellaneous flags */ char **error; /* store error message here */ { register struct token *tq; /* temp for scanning token list */ register struct token *tq_temp; /* temp */ int bang_route = FALSE; /* TRUE if bang route */ int pure_bang_route = TRUE; /* TRUE if pure bang route */ int domain_address = FALSE; /* TRUE if domain address */ int at_found = FALSE; /* TRUE if @ token found */ int ret_val = T_GENERAL; /* value to be returned */ struct token *tq_mark = NULL; /* mark primary domain */ tq = tq_anchor; /* load anchor into a register */ if (!WORDTOK(tq->form) && tq->text[0] != '.') { /* it doesn't begin with WORD token */ return T_NOMATCH; /* signal no match */ } /* some part of the remaining tokens matches the form */ /* skip initial collection of zero or more WORD tokens delimited * by one or more "." tokens */ for (;;) { tq_temp = tq->succ; if (tq->text[0] == '.' && tq_temp->text[0] == '.') { tq = tq_temp; continue; } if ((WORDTOK(tq->form) || WORDTOK(tq_temp->form)) && (tq->text[0] == '.' || tq_temp->text[0] == '.')) { tq = tq_temp; continue; } break; } while (!ENDTOK(tq->succ->form) && index("!%@", tq->succ->text[0]) && (WORDTOK(tq->succ->succ->form) || tq->succ->succ->text[0] == '.')) { switch(tq->succ->text[0]) { case '!': /* take first host in ! route */ bang_route = TRUE; break; case '%': /* alternately, last % host */ if (!bang_route) { tq_mark = tq->succ->succ; domain_address = TRUE; } pure_bang_route = FALSE; break; case '@': /* always take last @ host */ tq_mark = tq->succ->succ; domain_address = TRUE; at_found = TRUE; pure_bang_route = FALSE; break; } tq = tq->succ->succ; /* * skip initial collection of zero or more WORD tokens delimited * by one or more "." tokens */ for (;;) { tq_temp = tq->succ; if (tq->text[0] == '.' && tq_temp->text[0] == '.') { tq = tq_temp; continue; } if ((WORDTOK(tq->form) || WORDTOK(tq_temp->form)) && (tq->text[0] == '.' || tq_temp->text[0] == '.')) { tq = tq_temp; continue; } break; } } /* do we match host!(host!)*@route ? */ if (pure_bang_route && tq->succ->text[0] == '!' && tq->succ->succ->text[0] == '@') { return T_MUTANT_FORM; /* mutant form allowed for route */ } DEBUG(DBG_FIELD_HI, "found a WORD op WORD op ... WORD sequence\n"); *tq_new = tq; /* at end of sequence */ /* * qualify domain by appending qualifier to it, if needed * * If we have a WORD*@WORD1 or a WORD*%WORD1 form, qualify * the domain WORD1, if necessary by appending a qualifier * to the domain. len is updated to reflect length change. */ if (domain_address && (flags&F_LOCAL)) { char *s = queue_qualify_domain(tq_mark, tq); if (s) { /* append .s */ (*tq_new)->text = xprintf("%s.%s", (*tq_new)->text, s); /* field length increased */ *len += 1 + strlen(s); ret_val = T_MODIFIED; } } if (!tq_mark && uucp_host && (bang_route || (flags&F_STRICT))) { /* * we have a bang route, prepend uucp_host! to the route. * Also prepend uucp_host! to the route if we are doing * strict RFC822. In this case an address will be * prepended with the route back to the sender and * appended with the current domain. */ tq_anchor->text = xprintf("%s!%s", uucp_host, tq_anchor->text); /* field length increased */ *len += 1 + strlen(uucp_host); ret_val = T_MODIFIED; } /* * if we want strict addresses and we don't have one, or if * we have a local address but we don't want one (for a local * message only), then append @domain */ if ((flags & F_STRICT && ! at_found) || ((flags & F_LOCAL) && domain && ! (domain_address || bang_route || at_found))) { (*tq_new)->text = xprintf("%s@%s", (*tq_new)->text, domain); /* field length increased */ *len += 1 + strlen(domain); ret_val = T_MODIFIED; } if (extract_q) { /* * have the address added to the extraction queue */ if (enqueue_address(extract_q, tq_anchor, *tq_new, error) < FAIL) { /* enqueue_address returned error, specific error already logged */ return FAIL; /* signal an error */ } DEBUG(DBG_FIELD_MID, "address enqueued\n"); } return ret_val; } /* * queue_qualify_domain - untokenize a domain and call qualify_domain * * Called from match_general, this routine takes a token list * representing a domain, converts it back to a string and calls * qualify_domain() to determine if any text needs to be appended * in order to make the domain fully qualified. * * inputs: * tq_start - first token in domain * tq_end - last token in domain * * output: * NULL if nothing should be appended to the domain, * otherwise a string which represents the complete super * domain that the given domain should be qualified in. * * called by: match_general * calls: qualify_domain(external), detokenize */ static char * queue_qualify_domain(tq_start, tq_end) struct token *tq_start; /* beginning of domain reference */ struct token *tq_end; /* end of domain reference */ { struct str str; register struct str *sp = &str; /* dynamic string region */ register struct token *tq; /* temp for scanning through tokens */ char *ret; /* return value from qualify_domain */ STR_INIT(sp); /* initialize dynamic string region */ /* get string represented by domain tokens */ tq = tq_start; do { STR_CAT(sp, tq->text); } while (tq != tq_end && (tq = tq->succ)); STR_NEXT(sp, '\0'); /* send out for the actual qualification */ ret = qualify_domain(sp->p); DEBUG2(200, "qualify_domain(%s) returns %s\n", sp->p, ret? ret: "(null)"); STR_FREE(sp); /* free region */ return ret; /* return the value from qualify_domain() */ } /* * enqueue_address - insert a new address into a queue * * Given a token list representing an address, detokenize the list * and add it to the given address queue. * * inputs: * q - pointer to queue of addresses * tq_start - first token in the address * tq_end - ending token of the address * errro - store any error message here * * outputs: * SUCCEED if everything went okay, FAIL on error * * called by: match_or_route_group, match_general * calls: detokenize */ static int enqueue_address(q, tq_start, tq_end, error) struct addr **q; /* queue in which to insert */ struct token *tq_start; /* first token in the address */ struct token *tq_end; /* ending token in the address */ char **error; /* store error message here */ { register char *s; /* string representing the address */ register struct addr *temp_q; /* temp */ char *parse_error; /* error from parse_address() */ /* grab the string corresponding to the tokens */ s = detokenize(FALSE, (char *)NULL, tq_start, tq_end); DEBUG1(DBG_FIELD_LO, "enqueue_address(%s)\n", s); /* insert it into the queue */ temp_q = alloc_addr(); /* get an address queue entry */ temp_q->succ = *q; temp_q->in_addr = s; /* work_addr gets a mungeable copy */ if ((temp_q->work_addr = preparse_address(s, &parse_error)) == NULL) { *error = xprintf("%s: %s", s, error); return FAIL; } *q = temp_q; /* insert at beginning of list */ return SUCCEED; /* added to the list */ } /* * dump_tokens - list tokens to standard error for debugging purposes * * called from the DUMP_TOKENS macro, this function generates * a verbose description of what is going on with a list of tokens. * * input: * tq - head of a queue of tokens * * outputs: * none * * called by: DUMP_TOKENS(local macro) */ void dump_tokens(tq) register struct token *tq; /* dump these tokens on errfile */ { (void)fprintf(errfile, "token list:\n"); while (tq) { register char *s; char buf[100+1]; switch(tq->form) { case T_QUOTE: s = "T_QUOTE"; break; case T_DOMLIT: s = "T_DOMLIT"; break; case T_OPER: s = "T_OPER"; break; case T_TEXT: s = "T_TEXT"; break; case T_END: s = "T_END"; break; case T_ERROR: s = "T_ERROR"; break; default: (void)sprintf(s = buf, "form=%d", tq->form); break; } (void)fprintf(errfile, "\t|%s|%s|%s|\n", tq->space, tq->text, s); tq = tq->succ; } (void)fprintf(errfile, "end of list\n"); } #ifdef STANDALONE #include "varargs.h" int send_to_postmaster = FALSE; /* see if this gets set */ int return_to_sender = FALSE; /* see if this gets set */ struct addr *recipients = NULL; /* initial list here is zero */ char **args_recipients = {0}; /* nothing in this list */ int exitvalue = 0; FILE *errfile = stderr; char *primary_name = NULL; char *program = "field"; int compile_num = 999; extern int getopt(); extern char *optarg; extern int optind; #ifdef DEBUG_LEVEL int debug = DEBUG_LEVEL; #else /* DEBUG_LEVEL */ int debug = 0; #endif /* DEBUG_LEVEL */ /* * test the above functions by calling process_field for each * argument given to the program. */ void main(argc, argv) int argc; /* count of arguments */ char **argv; /* vector of arguments */ { char *s; /* return value from process_field */ struct addr *q; /* temp for scanning hdr_recipients */ char *error; char *domain = NULL; char *uucp_host = NULL; int flags = 0; int c; while ((c = getopt(argc, argv, "v:p:d:u:lsaD:")) != EOF) { switch (c) { case 'v': visible_name = optarg; break; case 'p': primary_name = optarg; break; case 'd': domain = optarg; break; case 'u': uucp_host = optarg; break; case 'l': flags |= F_LOCAL; break; case 's': flags |= F_STRICT; break; case 'a': flags |= F_ALIAS; break; case 'D': debug = atoi(optarg); break; } } argc -= optind; argv += optind; /* * loop over all arguments */ if (argc > 0) { while (*argv) { (void)fprintf(stderr, "input: %s\n", *argv); s = index(*argv, ':'); if (s) { s++; } else { s = *argv; } /* * non-strict RFC822, from local machine */ error = NULL; s = process_field(*argv, s, domain, uucp_host, &recipients, flags, &error); if (error) { (void) fprintf(stderr, "error: %s\n", error); } else { (void)fprintf(stderr, "output: %s\n", s? s: "(null)"); } argv++; } } else { char line[4096]; while (gets(line) != NULL) { (void)fprintf(stderr, "input: %s\n", line); s = index(line, ':'); if (s) { s++; } else { s = line; } /* * non-strict RFC822, from local machine */ error = NULL; s = process_field(line, s, domain, uucp_host, &recipients, flags, &error); if (error) { (void) fprintf(stderr, "error: %s\n", error); } else { (void)fprintf(stderr, "output: %s\n", s? s: "(null)"); } } } for (q = recipients; q; q = q->succ) { (void)printf("%s\n", q->in_addr); } exit(exitvalue); } /* * define panic, fatal and write_log here, rather than * using the external routines. We are testing and just want * the information displayed, not logged. */ /*VARARGS2*/ void panic(exitcode, fmt, va_alist) int exitcode; /* call exit(exitcode) */ char *fmt; /* printf(3) format */ va_dcl /* arguments for printf */ { va_list ap; va_start(ap); (void)fprintf(stderr, "PANIC(%s): ", exitcode); (void)vfprintf(stderr, fmt, ap); putc('\n', stderr); /* fatal messages not \n terminated */ va_end(ap); return_to_sender = TRUE; exit(exitcode); } /*VARARGS2*/ void write_log(log, fmt, va_alist) int log; /* TRUE if to write global log file */ char *fmt; /* printf(3) format */ va_dcl /* arguments for printf */ { va_list ap; va_start(ap); (void)fprintf(stderr, log? "PUBLIC: ": "PRIVATE: "); (void)vfprintf(stderr, fmt, ap); putc('\n', stderr); va_end(ap); } #endif /* STANDALONE */