Usenet 1994 January

home *** CD-ROM | disk | FTP | other *** search

/ Usenet 1994 January / usenetsourcesnewsgroupsinfomagicjanuary1994.iso / sources / unix / volume22 / nn6.4 / part17 / digest.c < prev

Wrap

C/C++ Source or Header | 1990-06-07 | 7.4 KB | 363 lines

/* * (c) Copyright 1990, Kim Fabricius Storm. All rights reserved. * * Digest article handling */ #include "config.h" #include "news.h" #include "debug.h" #ifdef DG_TEST #define TEST(fmt, x, y) if (Debug & DG_TEST) printf(fmt, x, y) #else #define TEST(fmt, x, y) #endif #define UNIFY 040 static char digest_pattern[] = "igest"; init_digest_parsing() { register char *m; for (m = digest_pattern; *m; m++) *m |= UNIFY; } is_digest() { register char *subject; register char c, *q, *m; if ((subject = news.ng_subj) == NULL) return 0; while (c = *subject++) { if ((c | UNIFY) != ('d' | UNIFY)) continue; q = subject; m = digest_pattern; while ((c = *m++) && (*q++ | UNIFY) == c); if (c == NUL) return 1; } return 0; } /* * expect that f is positioned at header of an article */ static int is_mmdf_folder = 0; get_digest_article(f, hdrbuf) FILE *f; news_header_buffer hdrbuf; { int cont; digest.dg_hpos = ftell(f); TEST("GET DIGEST hp=%ld\n", digest.dg_hpos, 0); do { if (!parse_digest_header(f, 0, hdrbuf)) return -1; digest.dg_fpos = ftell(f); TEST("END HEADER hp=%ld fp=%ld\n", digest.dg_hpos, digest.dg_fpos); } while ((cont = skip_digest_body(f)) < 0); TEST("END BODY lp=%ld next=%ld\n", digest.dg_lpos, ftell(f)); return cont; } #define BACKUP_LINES 50 /* remember class + offset for parsed lines */ #define LN_BLANK 0x01 /* blank line */ #define LN_DASHED 0x02 /* dash line */ #define LN_HEADER 0x04 /* (possible) header line */ #define LN_ASTERISK 0x08 /* asterisk line (near end) */ #define LN_END_OF 0x10 /* End of ... line */ #define LN_TEXT 0x20 /* unclassified line */ /* * skip until 'Subject: ' (or End of digest) line is found * then backup till start of header */ /* * Tuning parameters: * * MIN_HEADER_LINES: number of known header lines that must * be found in a block to identify a new * header * * MAX_BLANKS_DASH max no of blanks on a 'dash line' * * MIN_DASHES min no of dashes on a 'dash line' * * MAX_BLANKS_ASTERISKS max no of blanks on an 'asterisk line' * * MIN_ASTERISKS min no of asterisks on an 'asterisk line' * * MAX_BLANKS_END_OF max no of blanks before "End of " */ #define MIN_HEADER_LINES 2 #define MAX_BLANKS_DASH 3 #define MIN_DASHES 16 #define MAX_BLANKS_ASTERISK 1 #define MIN_ASTERISKS 10 #define MAX_BLANKS_END_OF 1 skip_digest_body(f) register FILE *f; { off_t backup_p[BACKUP_LINES]; int line_type[BACKUP_LINES]; register int backup_index, backup_count; int more_header_lines, end_or_asterisks, blanks; char line[1024]; register char *cp; char **dg_hdr_field(); #define decrease_index() \ if (--backup_index < 0) backup_index = BACKUP_LINES - 1 backup_index = -1; backup_count = 0; end_or_asterisks = 0; digest.dg_lines = 0; next_line: more_header_lines = 0; next_possible_header_line: digest.dg_lines++; if (++backup_index == BACKUP_LINES) backup_index = 0; if (backup_count < BACKUP_LINES) backup_count++; backup_p[backup_index] = ftell(f); line_type[backup_index] = LN_TEXT; if (fgets(line, 1024, f) == NULL) { TEST("end_of_file, bc=%d, lines=%d\n", backup_count, digest.dg_lines); if (is_mmdf_folder) { digest.dg_lpos = backup_p[backup_index]; is_mmdf_folder = 0; return 0; } /* end of file => look for "****" or "End of" line */ if (end_or_asterisks) while (--backup_count >= 0) { --digest.dg_lines; decrease_index(); if (line_type[backup_index] & (LN_ASTERISK | LN_END_OF)) break; } if (digest.dg_lines == 0) return 0; while (--backup_count >= 0) { --digest.dg_lines; digest.dg_lpos = backup_p[backup_index]; decrease_index(); if ((line_type[backup_index] & (LN_ASTERISK | LN_END_OF | LN_BLANK | LN_DASHED)) == 0) break; } return 0; /* no article follows */ } TEST("\n>>%-.50s ==>>", line, 0); if (line[0] == '\001' && strcmp(line, "\001\001\001\001\n") == 0) { digest.dg_lpos = backup_p[backup_index]; if (!is_mmdf_folder) fseek(f, digest.dg_lpos, 0); --digest.dg_lines; is_mmdf_folder = 0; return (digest.dg_lines <= 0) ? -1 : 1; } if (is_mmdf_folder) goto next_line; for (cp = line; *cp && isascii(*cp) && isspace(*cp); cp++); if (*cp == NUL) { TEST("BLANK", 0, 0); line_type[backup_index] = LN_BLANK; goto next_line; } blanks = cp - line; if (*cp == '-') { if (blanks > MAX_BLANKS_DASH) goto next_line; while (*cp == '-') cp++; if (cp - line - blanks > MIN_DASHES) { while (*cp && (*cp == '-' || (isascii(*cp) && isspace(*cp)))) cp++; if (*cp == NUL) { TEST("DASHED", 0, 0); line_type[backup_index] = LN_DASHED; } } goto next_line; } if (*cp == '*') { if (blanks > MAX_BLANKS_ASTERISK) goto next_line; while (*cp == '*') cp++; if (cp - line - blanks > MIN_ASTERISKS) { while (*cp && (*cp == '*' || (isascii(*cp) && isspace(*cp)))) cp++; if (*cp == NUL) { TEST("ASTERISK", 0, 0); line_type[backup_index] = LN_ASTERISK; end_or_asterisks++; } } goto next_line; } if (blanks <= MAX_BLANKS_END_OF && *cp == 'E' && strncmp(cp, "End of ", 7) == 0) { TEST("END_OF_", 0, 0); line_type[backup_index] = LN_END_OF; end_or_asterisks++; goto next_line; } if (blanks == 0) { if (dg_hdr_field(line, 0)) { TEST("HEADER", 0, 0); line_type[backup_index] = LN_HEADER; if (++more_header_lines < MIN_HEADER_LINES) goto next_possible_header_line; /* found block with MIN_HEADER_LINES */ /* search for beginning of header */ TEST("\nSearch for start of header\n", 0, 0); for (;;) { fseek(f, backup_p[backup_index], 0); --digest.dg_lines; if (--backup_count == 0) break; decrease_index(); if ((line_type[backup_index] & (LN_HEADER | LN_TEXT)) == 0) break; } if (digest.dg_lines == 0) { TEST("Skipped empty article\n", 0, 0); return -1; } for (;;) { digest.dg_lpos = backup_p[backup_index]; if (--backup_count < 0) break; decrease_index(); if ((line_type[backup_index] & (LN_BLANK | LN_DASHED)) == 0) break; --digest.dg_lines; } return (digest.dg_lines == 0) ? -1 : 1; } goto next_possible_header_line; } goto next_line; } parse_digest_header(f, all, hdrbuf) FILE *f; int all; news_header_buffer hdrbuf; { extern char *parse_header(), **dg_hdr_field(); digest.dg_date = digest.dg_from = digest.dg_subj = digest.dg_to = NULL; parse_header(f, dg_hdr_field, all, hdrbuf); return digest.dg_from || digest.dg_subj; } static char **dg_hdr_field(lp, all) register char *lp; int all; { #define check(name, lgt, field) \ if (isascii(lp[lgt]) && isspace(lp[lgt]) && strncmp(name, lp, lgt) == 0) {\ TEST("MATCH: field ", 0, 0); \ return &digest.field; \ } TEST("\nPARSE[%.20s] ==>> ", lp, 0); switch (*lp++) { case '\001': if (!is_mmdf_folder && strncmp(lp, "\001\001\001\n", 4) == 0) { is_mmdf_folder = 1; digest.dg_hpos += 5; return NULL; } break; case 'D': case 'd': check("ate:", 4, dg_date); break; case 'F': case 'f': check("rom:", 4, dg_from); break; case 'R': case 'r': if (!all) break; check("e:", 2, dg_subj); break; case 'S': case 's': check("ubject:", 7, dg_subj); check("ubject", 6, dg_subj); break; case 'T': case 't': check("itle:", 5, dg_subj); if (!all) break; check("o:", 2, dg_to); break; } #undef check TEST("NOT MATCHED ", 0, 0); return NULL; }