home *** CD-ROM | disk | FTP | other *** search
- /*
- * (c) Copyright 1990, Kim Fabricius Storm. All rights reserved.
- *
- * Digest article handling
- */
-
- #include "config.h"
- #include "news.h"
- #include "debug.h"
-
- #ifdef DG_TEST
-
- #define TEST(fmt, x, y) if (Debug & DG_TEST) printf(fmt, x, y)
-
- #else
-
- #define TEST(fmt, x, y)
-
- #endif
-
- #define UNIFY 040
-
- static char digest_pattern[] = "igest";
-
- init_digest_parsing()
- {
- register char *m;
-
- for (m = digest_pattern; *m; m++) *m |= UNIFY;
- }
-
-
- is_digest()
- {
- register char *subject;
- register char c, *q, *m;
-
- if ((subject = news.ng_subj) == NULL) return 0;
-
- while (c = *subject++) {
- if ((c | UNIFY) != ('d' | UNIFY)) continue;
-
- q = subject; m = digest_pattern;
- while ((c = *m++) && (*q++ | UNIFY) == c);
- if (c == NUL) return 1;
- }
- return 0;
- }
-
-
- /*
- * expect that f is positioned at header of an article
- */
-
- static int is_mmdf_folder = 0;
-
- get_digest_article(f, hdrbuf)
- FILE *f;
- news_header_buffer hdrbuf;
- {
- int cont;
-
- digest.dg_hpos = ftell(f);
- TEST("GET DIGEST hp=%ld\n", digest.dg_hpos, 0);
-
- do {
- if (!parse_digest_header(f, 0, hdrbuf)) return -1;
- digest.dg_fpos = ftell(f);
- TEST("END HEADER hp=%ld fp=%ld\n", digest.dg_hpos, digest.dg_fpos);
- } while ((cont = skip_digest_body(f)) < 0);
-
- TEST("END BODY lp=%ld next=%ld\n", digest.dg_lpos, ftell(f));
-
- return cont;
- }
-
- #define BACKUP_LINES 50 /* remember class + offset for parsed lines */
-
- #define LN_BLANK 0x01 /* blank line */
- #define LN_DASHED 0x02 /* dash line */
- #define LN_HEADER 0x04 /* (possible) header line */
- #define LN_ASTERISK 0x08 /* asterisk line (near end) */
- #define LN_END_OF 0x10 /* End of ... line */
- #define LN_TEXT 0x20 /* unclassified line */
-
-
- /*
- * skip until 'Subject: ' (or End of digest) line is found
- * then backup till start of header
- */
-
- /*
- * Tuning parameters:
- *
- * MIN_HEADER_LINES: number of known header lines that must
- * be found in a block to identify a new
- * header
- *
- * MAX_BLANKS_DASH max no of blanks on a 'dash line'
- *
- * MIN_DASHES min no of dashes on a 'dash line'
- *
- * MAX_BLANKS_ASTERISKS max no of blanks on an 'asterisk line'
- *
- * MIN_ASTERISKS min no of asterisks on an 'asterisk line'
- *
- * MAX_BLANKS_END_OF max no of blanks before "End of "
- */
-
- #define MIN_HEADER_LINES 2
- #define MAX_BLANKS_DASH 3
- #define MIN_DASHES 16
- #define MAX_BLANKS_ASTERISK 1
- #define MIN_ASTERISKS 10
- #define MAX_BLANKS_END_OF 1
-
- skip_digest_body(f)
- register FILE *f;
- {
- off_t backup_p[BACKUP_LINES];
- int line_type[BACKUP_LINES];
- register int backup_index, backup_count;
- int more_header_lines, end_or_asterisks, blanks;
- char line[1024];
- register char *cp;
- char **dg_hdr_field();
-
- #define decrease_index() \
- if (--backup_index < 0) backup_index = BACKUP_LINES - 1
-
- backup_index = -1;
- backup_count = 0;
- end_or_asterisks = 0;
-
- digest.dg_lines = 0;
-
-
- next_line:
- more_header_lines = 0;
-
- next_possible_header_line:
- digest.dg_lines++;
-
- if (++backup_index == BACKUP_LINES) backup_index = 0;
- if (backup_count < BACKUP_LINES) backup_count++;
-
- backup_p[backup_index] = ftell(f);
- line_type[backup_index] = LN_TEXT;
-
- if (fgets(line, 1024, f) == NULL) {
- TEST("end_of_file, bc=%d, lines=%d\n", backup_count, digest.dg_lines);
-
- if (is_mmdf_folder) {
- digest.dg_lpos = backup_p[backup_index];
- is_mmdf_folder = 0;
- return 0;
- }
-
- /* end of file => look for "****" or "End of" line */
-
- if (end_or_asterisks)
- while (--backup_count >= 0) {
- --digest.dg_lines;
- decrease_index();
- if (line_type[backup_index] & (LN_ASTERISK | LN_END_OF)) break;
- }
-
- if (digest.dg_lines == 0) return 0;
-
- while (--backup_count >= 0) {
- --digest.dg_lines;
- digest.dg_lpos = backup_p[backup_index];
- decrease_index();
- if ((line_type[backup_index] &
- (LN_ASTERISK | LN_END_OF | LN_BLANK | LN_DASHED)) == 0)
- break;
- }
-
- return 0; /* no article follows */
- }
-
- TEST("\n>>%-.50s ==>>", line, 0);
-
- if (line[0] == '\001' && strcmp(line, "\001\001\001\001\n") == 0) {
- digest.dg_lpos = backup_p[backup_index];
- if (!is_mmdf_folder) fseek(f, digest.dg_lpos, 0);
- --digest.dg_lines;
- is_mmdf_folder = 0;
- return (digest.dg_lines <= 0) ? -1 : 1;
- }
-
- if (is_mmdf_folder) goto next_line;
-
- for (cp = line; *cp && isascii(*cp) && isspace(*cp); cp++);
-
- if (*cp == NUL) {
- TEST("BLANK", 0, 0);
- line_type[backup_index] = LN_BLANK;
- goto next_line;
- }
-
- blanks = cp - line;
-
- if (*cp == '-') {
- if (blanks > MAX_BLANKS_DASH) goto next_line;
-
- while (*cp == '-') cp++;
- if (cp - line - blanks > MIN_DASHES) {
- while (*cp && (*cp == '-' || (isascii(*cp) && isspace(*cp)))) cp++;
- if (*cp == NUL) {
- TEST("DASHED", 0, 0);
-
- line_type[backup_index] = LN_DASHED;
- }
-
- }
- goto next_line;
- }
-
- if (*cp == '*') {
- if (blanks > MAX_BLANKS_ASTERISK) goto next_line;
-
- while (*cp == '*') cp++;
- if (cp - line - blanks > MIN_ASTERISKS) {
- while (*cp && (*cp == '*' || (isascii(*cp) && isspace(*cp)))) cp++;
- if (*cp == NUL) {
- TEST("ASTERISK", 0, 0);
- line_type[backup_index] = LN_ASTERISK;
- end_or_asterisks++;
- }
- }
- goto next_line;
- }
-
- if (blanks <= MAX_BLANKS_END_OF &&
- *cp == 'E' && strncmp(cp, "End of ", 7) == 0) {
- TEST("END_OF_", 0, 0);
- line_type[backup_index] = LN_END_OF;
- end_or_asterisks++;
- goto next_line;
- }
-
- if (blanks == 0) {
- if (dg_hdr_field(line, 0)) {
- TEST("HEADER", 0, 0);
-
- line_type[backup_index] = LN_HEADER;
- if (++more_header_lines < MIN_HEADER_LINES)
- goto next_possible_header_line;
-
- /* found block with MIN_HEADER_LINES */
-
- /* search for beginning of header */
-
- TEST("\nSearch for start of header\n", 0, 0);
-
- for (;;) {
- fseek(f, backup_p[backup_index], 0);
- --digest.dg_lines;
- if (--backup_count == 0) break;
- decrease_index();
- if ((line_type[backup_index] & (LN_HEADER | LN_TEXT)) == 0)
- break;
- }
-
- if (digest.dg_lines == 0) {
- TEST("Skipped empty article\n", 0, 0);
- return -1;
- }
-
- for (;;) {
- digest.dg_lpos = backup_p[backup_index];
- if (--backup_count < 0) break;
- decrease_index();
- if ((line_type[backup_index] & (LN_BLANK | LN_DASHED)) == 0)
- break;
- --digest.dg_lines;
- }
-
- return (digest.dg_lines == 0) ? -1 : 1;
- }
- goto next_possible_header_line;
- }
-
- goto next_line;
- }
-
-
- parse_digest_header(f, all, hdrbuf)
- FILE *f;
- int all;
- news_header_buffer hdrbuf;
- {
- extern char *parse_header(), **dg_hdr_field();
-
- digest.dg_date = digest.dg_from = digest.dg_subj = digest.dg_to = NULL;
-
- parse_header(f, dg_hdr_field, all, hdrbuf);
-
- return digest.dg_from || digest.dg_subj;
- }
-
-
- static char **dg_hdr_field(lp, all)
- register char *lp;
- int all;
- {
-
- #define check(name, lgt, field) \
- if (isascii(lp[lgt]) && isspace(lp[lgt]) && strncmp(name, lp, lgt) == 0) {\
- TEST("MATCH: field ", 0, 0); \
- return &digest.field; \
- }
-
-
- TEST("\nPARSE[%.20s] ==>> ", lp, 0);
-
- switch (*lp++) {
-
- case '\001':
- if (!is_mmdf_folder && strncmp(lp, "\001\001\001\n", 4) == 0) {
- is_mmdf_folder = 1;
- digest.dg_hpos += 5;
- return NULL;
- }
- break;
-
- case 'D':
- case 'd':
- check("ate:", 4, dg_date);
- break;
-
- case 'F':
- case 'f':
- check("rom:", 4, dg_from);
- break;
-
- case 'R':
- case 'r':
- if (!all) break;
- check("e:", 2, dg_subj);
- break;
-
- case 'S':
- case 's':
- check("ubject:", 7, dg_subj);
- check("ubject", 6, dg_subj);
- break;
-
- case 'T':
- case 't':
- check("itle:", 5, dg_subj);
- if (!all) break;
- check("o:", 2, dg_to);
- break;
- }
-
- #undef check
- TEST("NOT MATCHED ", 0, 0);
-
- return NULL;
- }
-