home *** CD-ROM | disk | FTP | other *** search
- /*++
- /* NAME
- /* ms_parse 3
- /* SUMMARY
- /* message parser
- /* PROJECT
- /* pc-mail
- /* PACKAGE
- /* mailsh
- /* SYNOPSIS
- /* #include "ms_parse.h"
- /*
- /* int ms_parse(context, line)
- /* int context;
- /* char *line;
- /*
- /* int hscanf(line, prefix, format, result)
- /* char *line;
- /* char *prefix;
- /* char *format;
- /* char *result;
- /* DESCRIPTION
- /* The routines in this module recognize
- /* the context in which successive lines of text occur within an
- /* e-mail message, or extract specific information from header
- /* lines.
- /*
- /* The expected format of an e-mail message is: UUCP header lines,
- /* RFC822-like header lines, message body. Each of these sections
- /* may be missing from the message. A header line is a line that
- /* has no blanks before the first colon appearing on that line.
- /*
- /* ms_parse() determines the context in which a line of text was found:
- /*
- /* .nf
- MS_UUCP UUCP-style From_ line
- MS_HEADER RFC822-like header line
- MS_CONT Continued header line
- MS_BODY Line within message body
- /* .fi
- /*
- /* During the first call of ms_parse(), the context argument should have
- /* the value MS_UUCP. Upon successive calls the value should be equal
- /* to the last value returned by ms_parse(). The algorithm is transparent
- /* to other context values (i.e. they cause no transitions).
- /*
- /* hscanf() compares the beginning of a line with the specified prefix
- /* (ignoring case differences), and if the comparison succeeds, it
- /* invokes sscanf() on the remainder of that line. A zero return value
- /* means that no information was extracted with sscanf.
- /* AUTHOR(S)
- /* W.Z. Venema
- /* Eindhoven University of Technology
- /* Department of Mathematics and Computer Science
- /* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
- /* CREATION DATE
- /* Sat Dec 9 18:50:35 MET 1989
- /* LAST MODIFICATION
- /* 90/01/22 13:02:12
- /* VERSION/RELEASE
- /* 2.1
- /*--*/
-
- #include <stdio.h>
- #include <ctype.h>
-
- #include "defs.h"
- #include "ms_parse.h"
-
- /* forward declarations */
-
- hidden int isheader();
-
- /* hscanf - match header and extract info from remainder of header line */
-
- public int hscanf(line, pre, fmt, ptr)
- char *line;
- char *pre;
- char *fmt;
- char *ptr;
- {
- int len = strlen(pre);
-
- return (istrncmp(pre, line, len) == 0 && sscanf(line + len, fmt, ptr) == 1);
- }
-
- /* ms_parse - parse one message line */
-
- public int ms_parse(context, line)
- register int context;
- register char *line;
- {
-
- /*
- * A message may begin with UUCP header lines ("From blablabla",
- * sometimes escaped with a ">" character), followed by RFC822-like
- * header lines (lines that start with a word + colon, or continuation
- * lines that start with whitespace), followed by the remainder of the
- * message. Header and body are usually separated by an empty line (on
- * systems that can handle that) but the we do not require this.
- */
-
- switch (context) {
- case MS_UUCP:
- if (line[0] == '>' || strncmp(line, "From ", 5) == 0)
- return (MS_UUCP);
- if (isspace(line[0]))
- return (MS_BODY);
- /* FALLTHROUGH */
- case MS_HEADER:
- case MS_CONT:
- if (isspace(line[0]))
- return (MS_CONT);
- if (isheader(line))
- return (MS_HEADER);
- /* FALLTHROUGH */
- case MS_BODY:
- return (MS_BODY);
- }
- /* NOTREACHED */
- }
-
- /* isheader - does this line look like a header? */
-
- hidden int isheader(buf)
- char *buf;
- {
- static char blanks[] = " \t\f";
- char *cp;
- char *blk;
- char *colon;
-
- /*
- * A header line has no blanks before the first colon. Which means that a
- * line that starts with a colon character is treated as header line.
- * This turns out to be what many sendmail implementations do, too.
- */
-
- if ((colon = index(buf, ':')) == 0) { /* check for colon */
- return (0);
- } else { /* find preceding blanks */
- for (cp = blanks; *cp; cp++)
- if ((blk = index(buf, *cp)) != 0 && blk < colon)
- return (0);
- }
- return (1);
- }
-