home *** CD-ROM | disk | FTP | other *** search
- /* $Header: mt-process.c,v 4.3.3.3 91/01/18 19:13:20 davison Trn $
- **
- ** $Log: mt-process.c,v $
- ** Revision 4.3.3.3 91/01/18 19:13:20 davison
- ** Removed the code that tried to exclude certain message ids. Added -s option
- **
- ** Revision 4.3.3.2 90/08/20 16:40:31 davison
- ** Added check of caught_interrupt flag into main loops.
- **
- ** Revision 4.3.3.1 90/07/28 18:04:45 davison
- ** Initial Trn Release
- **
- */
-
- #include "EXTERN.h"
- #include "common.h"
- #include "mthreads.h"
- #ifdef SERVER
- #include "server.h"
- #endif
-
- #include <time.h>
- #ifndef TZSET
- # include <sys/timeb.h>
- #endif
-
- char buff[1024];
-
- char references[1024];
-
- char subject_str[80];
- bool found_Re;
-
- char author_str[20];
-
- extern int log_verbosity, slow_down;
-
- DOMAIN *next_domain;
-
- void insert_article(), expire(), trim_roots(), order_roots(), trim_authors();
- void make_root(), use_root(), merge_roots(), set_root(), unlink_root();
- void link_child(), unlink_child();
- void free_article(), free_domain(), free_subject(), free_root(), free_author();
- void get_subject_str(), get_author_str();
- ARTICLE *get_article();
- SUBJECT *new_subject();
- AUTHOR *new_author();
-
- #ifdef TZSET
- extern time_t tnow;
- #else
- extern struct timeb ftnow;
- #endif
-
- #ifndef SERVER
- static FILE *fp_article;
- #endif
-
- /* Given the upper/lower bounds of the articles in the current group, add all
- ** the ones that we don't know about and remove all the ones that have expired.
- ** The current directory must be the newgroup's spool directory.
- */
- void
- process_articles( first_article, last_article )
- ART_NUM first_article, last_article;
- {
- register char *cp, *str;
- register ARTICLE *article;
- register ART_NUM i;
- time_t date;
- int len;
- #ifdef SERVER
- bool orig_extra = extra_expire;
- #endif
- extern int errno;
- extern int sys_nerr;
- extern char *sys_errlist[];
-
- if( first_article > (i = total.last+1) ) {
- i = first_article;
- }
- added_count = last_article - i + 1;
- expired_count = 0;
-
- for( ; i <= last_article; i++ ) {
- if( caught_interrupt ) {
- return;
- }
- #ifdef SERVER
- if( slow_down ) {
- sleep( slow_down );
- }
- sprintf( buff, "HEAD %ld", (long)i );
- put_server( buff );
- if( get_server( buff, sizeof buff ) < 0 || *buff == CHAR_FATAL ) {
- last_article = i - 1;
- extra_expire = FALSE;
- break;
- }
- if( *buff != CHAR_OK ) {
- added_count--;
- continue;
- }
- #else
- /* Open article in current directory. */
- sprintf( buff, "%ld", (long)i );
- /* Set errno for purely paranoid reasons */
- errno = 0;
- if( (fp_article = fopen( buff, "r" )) == Nullfp ) {
- /* Missing files are ok -- they've just been expired or canceled */
- if( errno != 0 && errno != ENOENT ) {
- if( errno < 0 || errno > sys_nerr ) {
- log_error( "Can't open `%s': Error %d.\n", buff, errno );
- } else {
- log_error( "Can't open `%s': %s.\n", buff,
- sys_errlist[errno] );
- }
- }
- added_count--;
- continue;
- }
- #endif
-
- article = Nullart;
- *references = '\0';
- *author_str = '\0';
- *subject_str = '\0';
- found_Re = 0;
- date = 0;
-
- #ifdef SERVER
- while( get_server( cp = buff, sizeof buff ) == 0 ) {
- process_line:
- if( *cp == '.' ) {
- break;
- }
- #else
- while( (cp = fgets( buff, sizeof buff, fp_article )) != Nullch ) {
- process_line:
- if( *cp == '\n' ) { /* check for end of header */
- break; /* break out when found */
- }
- #endif
- if( (unsigned char)*cp <= ' ' ) { /* skip continuation lines */
- continue; /* (except references -- see below) */
- }
- if( (str = index( cp, ':' )) == Nullch ) {
- break; /* end of header if no colon found */
- }
- if( (len = str - cp) > 10 ) {
- continue; /* skip keywords > 10 chars */
- }
- #ifndef SERVER
- cp[strlen(cp)-1] = '\0'; /* remove newline */
- #endif
- while( cp < str ) { /* lower-case the keyword */
- if( (unsigned char)*cp <= ' ' ) { /* stop at any whitespace */
- break;
- }
- if( isupper(*cp) ) {
- *cp = tolower(*cp);
- }
- cp++;
- }
- *cp = '\0';
- cp = buff;
- if( len == 4 && strEQ( cp, "date" ) ) {
- #ifdef TZSET
- date = getdate( str + 1, tnow, timezone );
- #else
- date = getdate( str + 1, ftnow.time, (long) ftnow.timezone );
- #endif
- } else
- if( len == 4 && strEQ( cp, "from" ) ) {
- get_author_str( str + 1 );
- } else
- if( len == 7 && strEQ( cp, "subject" ) ) {
- get_subject_str( str + 1 );
- } else
- if( len == 10 && strEQ( cp, "message-id" ) ) {
- if( !article ) {
- article = get_article( str + 1 );
- } else {
- if( log_verbosity ) {
- log_error( "Found multiple Message-IDs! [%ld].\n",
- (long)i );
- }
- }
- } else
- if( len == 10 && strEQ( cp, "references" ) ) {
- /* include preceding space in saved reference */
- len = strlen( str + 1 );
- bcopy( str + 1, references, len + 1 );
- str = references + len;
- /* check for continuation lines */
- #ifdef SERVER
- while( get_server( cp = buff, sizeof buff ) == 0 ) {
- #else
- while( (cp = fgets( buff, sizeof buff, fp_article )) != Nullch ) {
- #endif
- if( *cp != ' ' && *cp != '\t' ) {
- goto process_line;
- }
- while( *++cp == ' ' || *cp == '\t' ) {
- ;
- }
- *--cp = ' ';
- /* If the references are too long, shift them over to
- ** always save the most recent ones.
- */
- if( (len += strlen( cp )) > 1023 ) {
- strcpy( buff, buff + len - 1023 );
- str -= len - 1023;
- len = 1023;
- }
- strcpy( str, cp );
- }/* while */
- break;
- }/* if */
- }/* while */
- if( article ) {
- insert_article( article, date, i );
- } else {
- if( log_verbosity ) {
- log_error( "Message-ID line missing! [%ld].\n", (long)i );
- }
- }
- #ifndef SERVER
- fclose( fp_article );
- #endif
- }
-
- if( extra_expire || first_article > total.first ) {
- expire( first_article );
- }
- if( caught_interrupt ) {
- return;
- }
- trim_roots();
- order_roots();
- trim_authors();
-
- total.first = first_article;
- total.last = last_article;
- #ifdef SERVER
- extra_expire = orig_extra;
- #endif
- }
-
- /* Search all articles for numbers less than new_first. Traverse the list
- ** using the domain links so we don't have to deal with the tree structure.
- ** If extra_expire is true, stat() all valid articles to make sure they are
- ** really there and expire them if they're not.
- */
- void
- expire( new_first )
- ART_NUM new_first;
- {
- register DOMAIN *domain;
- register ARTICLE *article, *next_art, *hold;
-
- for( domain = &unk_domain; domain; domain = next_domain ) {
- next_domain = domain->link;
- for( article = domain->ids; article; article = next_art ) {
- if( caught_interrupt ) {
- return;
- }
- next_art = article->id_link;
- if( !article->subject || (article->flags & NEW_ARTICLE) ) {
- continue;
- }
- if( extra_expire && article->num >= new_first ) {
- #ifdef SERVER
- sprintf( buff, "STAT %ld", (long)article->num );
- put_server( buff );
- if( get_server( buff, sizeof buff ) == 0 && *buff == CHAR_OK ) {
- continue;
- }
- #else
- sprintf( buff, "%ld", (long)article->num );
- if( !stat( buff, &filestat ) || errno != ENOENT ) {
- continue;
- }
- #endif
- }
- if( extra_expire || article->num < new_first ) {
- article->subject->count--;
- article->subject = 0;
- article->author->count--;
- article->author = 0;
- /* Free expired article if it has no children. Then check
- ** if the parent(s) are also fake and can be freed. We'll
- ** free any empty roots later.
- */
- while( !article->children ) {
- hold = article->parent;
- unlink_child( article );
- free_article( article );
- if( hold && !hold->subject ) {
- if( (article = hold) == next_art ) {
- next_art = next_art->id_link;
- }
- } else {
- break;
- }
- }
- expired_count++;
- }/* if */
- }/* for */
- }/* for */
- next_domain = Null(DOMAIN*);
- }
-
- /* Trim the article chains down so that we don't have more than one faked
- ** article between the root any real ones.
- */
- void
- trim_roots()
- {
- register ROOT *root, *last_root;
- register ARTICLE *article, *next;
- register SUBJECT *subject, *last_subj;
- register int found;
-
- #ifndef lint
- last_root = (ROOT *)&root_root;
- #else
- last_root = Null(ROOT*);
- #endif
- for( root = root_root; root; root = last_root->link ) {
- for( article = root->articles; article; article = article->siblings ) {
- /* If an article has no subject, it is a "fake" reference node.
- ** If all of its immediate children are also fakes, delete it
- ** and graduate the children to the root. If everyone is fake,
- ** the chain dies.
- */
- while( !article->subject ) {
- found = 0;
- for( next = article->children; next; next = next->siblings ) {
- if( next->subject ) {
- found = 1;
- break;
- }
- }
- if( !found ) {
- /* Remove this faked article and move all its children
- ** up to the root.
- */
- next = article->children;
- unlink_child( article );
- free_article( article );
- for( article = next; article; article = next ) {
- next = article->siblings;
- article->parent = Nullart;
- link_child( article );
- }
- article = root->articles; /* start this root over */
- } else {
- break; /* else, on to next article */
- }
- }
- }
- /* Free all unused subject strings. Begin by trying to find a
- ** subject for the root's pointer.
- */
- for( subject = root->subjects; subject && !subject->count; subject = root->subjects ) {
- root->subjects = subject->link;
- free_subject( subject );
- root->subject_cnt--;
- }
- /* Then free up any unsed intermediate subjects.
- */
- if( (last_subj = subject) != Null(SUBJECT*) ) {
- while( (subject = subject->link) != Null(SUBJECT*) ) {
- if( !subject->count ) {
- last_subj->link = subject->link;
- free_subject( subject );
- root->subject_cnt--;
- subject = last_subj;
- } else {
- last_subj = subject;
- }
- }
- }
- /* Now, free all roots without articles. Flag unexpeced errors.
- */
- if( !root->articles ) {
- if( root->subjects ) {
- log_error( "** Empty root still had subjects remaining! **\n" );
- }
- last_root->link = root->link;
- free_root( root );
- } else {
- last_root = root;
- }
- }
- }
-
- /* Descend the author list, find any author names that aren't used
- ** anymore and free them.
- */
- void
- trim_authors()
- {
- register AUTHOR *author, *last_author;
-
- #ifndef lint
- last_author = (AUTHOR *)&author_root;
- #else
- last_author = Null(AUTHOR*);
- #endif
- for( author = author_root; author; author = last_author->link ) {
- if( !author->count ) {
- last_author->link = author->link;
- free_author( author );
- } else {
- last_author = author;
- }
- }
- }
-
- /* Reorder the roots to place the oldest ones first (age determined by
- ** date of oldest article).
- */
- void
- order_roots()
- {
- register ROOT *root, *next, *search;
-
- /* If we don't have at least two roots, we're done! */
- if( !(root = root_root) || !(next = root->link) ) {
- return; /* RETURN */
- }
- /* Break the old list off after the first root, and then start
- ** inserting the roots into the list by date.
- */
- root->link = Null(ROOT*);
- while( (root = next) != Null(ROOT*) ) {
- next = next->link;
- if( (search = root_root)->articles->date >= root->articles->date ) {
- root->link = root_root;
- root_root = root;
- } else {
- while( search->link
- && search->link->articles->date < root->articles->date ) {
- search = search->link;
- }
- root->link = search->link;
- search->link = root;
- }
- }
- }
-
- #define EQ(x,y) ((isupper(x) ? tolower(x) : (x)) == (y))
-
- /* Parse the subject into 72 characters or less. Remove any "Re[:^]"s from
- ** the front (noting that it's there), and any "(was: old)" stuff from
- ** the end. Then, compact multiple whitespace characters into one space,
- ** trimming leading/trailing whitespace. If it's still too long, unmercifully
- ** cut it off. We don't bother with subject continuation lines either.
- */
- void
- get_subject_str( str )
- register char *str;
- {
- register char *cp;
- register int len;
-
- while( *str && (unsigned char)*str <= ' ' ) {
- str++;
- }
- if( !*str ) {
- bcopy( "<None>", subject_str, 7 );
- return; /* RETURN */
- }
- cp = str;
- while( EQ( cp[0], 'r' ) && EQ( cp[1], 'e' ) ) { /* check for Re: */
- cp += 2;
- if( *cp == '^' ) { /* allow Re^2: */
- while( *++cp <= '9' && *cp >= '0' ) {
- ;
- }
- }
- if( *cp != ':' ) {
- break;
- }
- while( *++cp == ' ' ) {
- ;
- }
- found_Re = 1;
- str = cp;
- }
- /* Remove "(was Re: oldsubject)", because we already know the old subjects.
- ** Also match "(Re: oldsubject)". Allow possible spaces after the ('s.
- */
- for( cp = str; (cp = index( cp+1, '(' )) != Nullch; ) {
- while( *++cp == ' ' ) {
- ;
- }
- if( EQ( cp[0], 'w' ) && EQ( cp[1], 'a' ) && EQ( cp[2], 's' )
- && (cp[3] == ':' || cp[3] == ' ') )
- {
- *--cp = '\0';
- break;
- }
- if( EQ( cp[0], 'r' ) && EQ( cp[1], 'e' )
- && ((cp[2]==':' && cp[3]==' ') || (cp[2]=='^' && cp[4]==':')) ) {
- *--cp = '\0';
- break;
- }
- }
- /* Copy subject to a temporary string, compacting multiple spaces/tabs */
- for( len = 0, cp = subject_str; len < 72 && *str; len++ ) {
- if( (unsigned char)*str <= ' ' ) {
- while( *++str && (unsigned char)*str <= ' ' ) {
- ;
- }
- *cp++ = ' ';
- } else {
- *cp++ = *str++;
- }
- }
- if( cp[-1] == ' ' ) {
- cp--;
- }
- *cp = '\0';
- }
-
- /* Try to fit the author name in 16 bytes. Use the comment portion in
- ** parenthesis if present. Cut off non-commented names at the '@' or '%'.
- ** Then, put as many characters as we can into the 16 bytes, packing multiple
- ** whitespace characters into a single space.
- ** We should really implement a nice name shortening algorithm, or simply
- ** grab the name packing code from nn.
- */
- void
- get_author_str( str )
- char *str;
- {
- register char *cp, *cp2;
-
- if( (cp = index( str, '(' )) != Nullch ) {
- str = cp+1;
- if( (cp = rindex( str, ')' )) != Nullch ) {
- *cp = '\0';
- }
- } else {
- if( (cp = index( str, '@' )) != Nullch ) {
- *cp = '\0';
- }
- if( (cp = index( str, '%' )) != Nullch ) {
- *cp = '\0';
- }
- }
- for( cp = str, cp2 = author_str; *cp && cp2-author_str < 16; ) {
- /* Pack white space and turn ctrl-chars into spaces. */
- if( *cp <= ' ' ) {
- while( *++cp && *cp <= ' ' ) {
- ;
- }
- if( cp2 != author_str ) {
- *cp2++ = ' ';
- }
- } else {
- *cp2++ = *cp++;
- }
- }
- *cp2 = '\0';
- }
-
- /* Take a message-id and see if we already know about it. If so, return it.
- ** If not, create it. We separate the id into its id@domain parts, and
- ** link all the unique ids to one copy of the domain portion. This saves
- ** a bit of space.
- */
- ARTICLE *
- get_article( msg_id )
- char *msg_id;
- {
- register DOMAIN *domain;
- register ARTICLE *article;
- register char *cp, *after_at;
-
- /* Take message id, break it up into <id@domain>, and try to match it.
- */
- while( *msg_id == ' ' ) {
- msg_id++;
- }
- cp = msg_id + strlen( msg_id ) - 1;
- if( msg_id >= cp ) {
- if( log_verbosity ) {
- log_error( "Message-ID is empty!\n" );
- }
- return Nullart;
- }
- if( *msg_id++ != '<' ) {
- if( log_verbosity ) {
- log_error( "Message-ID doesn't start with '<'.\n" );
- }
- msg_id--;
- }
- if( *cp != '>' ) {
- if( log_verbosity ) {
- log_error( "Message-ID doesn't end with '>'.\n" );
- }
- cp++;
- }
- *cp = '\0';
- if( msg_id == cp ) {
- if( log_verbosity ) {
- log_error( "Message-ID is null!\n" );
- }
- return Nullart;
- }
-
- if( (after_at = index( msg_id, '@' )) == Nullch ) {
- domain = &unk_domain;
- } else {
- *after_at++ = '\0';
- for( cp = after_at; *cp; cp++ ) {
- if( isupper(*cp) ) {
- *cp = tolower(*cp); /* lower-case domain portion */
- }
- }
- *cp = '\0';
- /* Try to find domain name in database. */
- for( domain = unk_domain.link; domain; domain = domain->link ) {
- if( strEQ( domain->name, after_at ) ) {
- break;
- }
- }
- if( !domain ) { /* if domain doesn't exist, create it */
- register int len = cp - after_at + 1;
- domain = (DOMAIN *)safemalloc( sizeof (DOMAIN) );
- total.domain++;
- domain->name = safemalloc( len );
- total.string2 += len;
- bcopy( after_at, domain->name, len );
- domain->ids = Nullart;
- domain->link = unk_domain.link;
- unk_domain.link = domain;
- }
- }
- /* Try to find id in this domain. */
- for( article = domain->ids; article; article = article->id_link ) {
- if( strEQ( article->id, msg_id ) ) {
- break;
- }
- }
- if( !article ) { /* If it doesn't exist, create an article */
- register int len = strlen( msg_id ) + 1;
- article = (ARTICLE *)safemalloc( sizeof (ARTICLE) );
- bzero( article, sizeof (ARTICLE) );
- total.article++;
- article->num = 0;
- article->id = safemalloc( len );
- total.string2 += len;
- bcopy( msg_id, article->id, len );
- article->domain = domain;
- article->id_link = domain->ids;
- domain->ids = article;
- }
- return article;
- }
-
- /* Take all the data we've accumulated about the article and shove it into
- ** the article tree at the best place we can possibly imagine.
- */
- void
- insert_article( article, date, num )
- ARTICLE *article;
- time_t date;
- ART_NUM num;
- {
- register ARTICLE *node, *last;
- register char *cp, *end;
- int len;
-
- if( article->subject ) {
- if( log_verbosity ) {
- log_error( "We've already seen article #%ld (%s@%s)\n",
- (long)num, article->id, article->domain->name );
- }
- return; /* RETURN */
- }
- article->date = date;
- article->num = num;
- article->flags = NEW_ARTICLE;
-
- if( !*references && found_Re ) {
- if( log_verbosity > 1 ) {
- log_error( "Missing reference line! [%ld]\n", (long)num );
- }
- }
- /* If the article has a non-zero root, it is already in a thread somewhere.
- ** Unlink it to try to put it in the best possible spot.
- */
- if( article->root ) {
- /* Check for a real or shared-fake parent. Articles that have never
- ** existed have a num of 0. Expired articles that remain as references
- ** have a valid num. (Valid date too, but no subject.)
- */
- for( node = article->parent;
- node && !node->num && node->child_cnt == 1;
- node = node->parent )
- {
- ;
- }
- unlink_child( article );
- if( node ) { /* do we have decent parents? */
- /* Yes: assume that our references are ok, and just reorder us
- ** with our siblings by date.
- */
- link_child( article );
- use_root( article, article->root );
- /* Freshen the date in any faked parent articles. */
- for( node = article->parent;
- node && !node->num && date < node->date;
- node = node->parent )
- {
- node->date = date;
- unlink_child( node );
- link_child( node );
- }
- return; /* RETURN */
- }
- /* We'll assume that this article has as good or better references
- ** than the child that faked us initially. Free the fake reference-
- ** chain and process our references as usual.
- */
- for( node = article->parent; node; node = node->parent ) {
- unlink_child( node );
- free_article( node );
- }
- article->parent = Nullart; /* neaten up */
- article->siblings = Nullart;
- }
- check_references:
- if( !*references ) { /* If no references but "Re:" in subject, */
- if( found_Re ) { /* search for a reference in any cited text */
- #ifndef SERVER
- for( len = 4; len && fgets( buff, sizeof buff, fp_article ); len-- ) {
- if( (cp = index( buff, '<' )) && (end = index( cp, ' ' )) ) {
- if( end[-1] == ',' ) {
- end--;
- }
- *end = '\0';
- if( (end = index( cp, '>' )) == Nullch ) {
- end = cp + strlen( cp ) - 1;
- }
- if( valid_message_id( cp, end ) ) {
- strcpy( references+1, cp );
- *references = ' ';
- if( log_verbosity > 2 ) {
- log_error( "Found cited-text reference: '%s' [%ld]\n",
- references+1, (long)num );
- }
- break;
- }
- }
- }
- #endif
- } else {
- article->flags |= ROOT_ARTICLE;
- }
- }
- /* If we have references, process them from the right end one at a time
- ** until we either run into somebody, or we run out of references.
- */
- if( *references ) {
- last = article;
- node = Nullart;
- end = references + strlen( references ) - 1;
- while( (cp = rindex( references, ' ' )) != Nullch ) {
- *cp++ = '\0';
- while( end >= cp && ((unsigned char)*end <= ' ' || *end == ',') ) {
- end--;
- }
- end[1] = '\0';
- /* Quit parsing references if this one is garbage. */
- if( !valid_message_id( cp, end ) ) {
- if( log_verbosity ) {
- log_error( "Bad ref '%s' [%ld]\n", cp, (long)num );
- }
- break;
- }
- /* Dump all domains that end in '.', such as "..." & "1@DEL." */
- if( end[-1] == '.' ) {
- break;
- }
- node = get_article( cp );
- /* Check for duplicates on the reference line. Brand-new data has
- ** no date. Data we just allocated earlier on this line has a
- ** date but no root. Special-case the article itself, since it
- ** MIGHT have a root.
- */
- if( (node->date && !node->root) || node == article ) {
- if( log_verbosity ) {
- log_error( "Reference line contains duplicates [%ld]\n",
- (long)num );
- }
- if( (node = last) == article ) {
- node = Nullart;
- }
- continue;
- }
- last->parent = node;
- link_child( last );
- if( node->root ) {
- break;
- }
- node->date = date;
- last = node;
- end = cp-2;
- }
- if( !node ) {
- *references = '\0';
- goto check_references;
- }
- /* Check if we ran into anybody that was already linked. If so, we
- ** just use their root.
- */
- if( node->root ) {
- /* See if this article spans the gap between what we thought
- ** were two different roots.
- */
- if( article->root && article->root != node->root ) {
- merge_roots( node->root, article->root );
- /* Set the roots of any children we brought with us. */
- set_root( article, node->root );
- }
- use_root( article, node->root );
- } else {
- /* We didn't find anybody we knew, so either create a new root or
- ** use the article's root if it was previously faked.
- */
- if( !article->root ) {
- make_root( node );
- use_root( article, node->root );
- } else {
- use_root( article, article->root );
- node->root = article->root;
- link_child( node );
- }
- }
- /* Set the roots of the faked articles we created as references. */
- for( node = article->parent; node && !node->root; node = node->parent ) {
- node->root = article->root;
- }
- /* Make sure we didn't circularly link to a child article(!), by
- ** ensuring that we run into the root before we run into ourself.
- */
- while( node && node->parent != article ) {
- node = node->parent;
- }
- if( node ) {
- /* Ugh. Someone's tweaked reference line with an incorrect
- ** article order arrived first, and one of our children is
- ** really one of our ancestors. Cut off the bogus child branch
- ** right where we are and link it to the root.
- */
- if( log_verbosity ) {
- log_error("Found ancestral child -- fixing.\n");
- }
- unlink_child( node );
- node->parent = Nullart;
- link_child( node );
- }
- } else {
- /* The article has no references. Either turn it into a new root, or
- ** re-attach fleshed-out (previously faked) article to its old root.
- */
- if( !article->root ) {
- make_root( article );
- } else {
- use_root( article, article->root );
- link_child( article );
- }
- }
- }
-
- /* Check if the string we've found looks like a valid message-id reference.
- */
- int
- valid_message_id( start, end )
- register char *start, *end;
- {
- char *mid;
-
- if( *end != '>' ) {
- /* Compensate for spacecadets who include the header in their
- ** subsitution of all '>'s into another citation character.
- */
- if( *end == '<' || *end == '-' || *end == '!' || *end == '%'
- || *end == ')' || *end == '|' || *end == ':' || *end == '}'
- || *end == '*' || *end == '+' || *end == '#' || *end == ']'
- || *end == '@' ) {
- if( log_verbosity ) {
- log_error( "Reference ended in '%c'.\n", *end );
- }
- *end = '>';
- }
- }
- /* Id must be "<...@...>" */
- if( *start != '<' || *end != '>' || (mid = index( start, '@' )) == Nullch
- || mid == start+1 || mid+1 == end ) {
- return 0; /* RETURN */
- }
- return 1;
- }
-
- /* Remove an article from its parent/siblings. Leave parent pointer intact.
- */
- void
- unlink_child( child )
- register ARTICLE *child;
- {
- register ARTICLE *last;
-
- if( !(last = child->parent) ) {
- child->root->thread_cnt--;
- if( (last = child->root->articles) == child ) {
- child->root->articles = child->siblings;
- } else {
- goto sibling_search;
- }
- } else {
- last->child_cnt--;
- if( last->children == child ) {
- last->children = child->siblings;
- } else {
- last = last->children;
- sibling_search:
- while( last->siblings != child ) {
- last = last->siblings;
- }
- last->siblings = child->siblings;
- }
- }
- }
-
- /* Link an article to its parent article. If its parent pointer is zero,
- ** link it to its root. Sorts siblings by date.
- */
- void
- link_child( child )
- register ARTICLE *child;
- {
- register ARTICLE *node;
- register ROOT *root;
-
- if( !(node = child->parent) ) {
- root = child->root;
- root->thread_cnt++;
- node = root->articles;
- if( !node || child->date < node->date ) {
- child->siblings = node;
- root->articles = child;
- } else {
- goto sibling_search;
- }
- } else {
- node->child_cnt++;
- node = node->children;
- if( !node || child->date < node->date ) {
- child->siblings = node;
- child->parent->children = child;
- } else {
- sibling_search:
- for( ; node->siblings; node = node->siblings ) {
- if( node->siblings->date > child->date ) {
- break;
- }
- }
- child->siblings = node->siblings;
- node->siblings = child;
- }
- }
- }
-
- /* Create a new root for the specified article. If the current subject_str
- ** matches any pre-existing root's subjects, we'll instead add it on as a
- ** parallel thread.
- */
- void
- make_root( article )
- ARTICLE *article;
- {
- register ROOT *new, *node;
- register SUBJECT *subject;
-
- #ifndef NO_SUBJECT_MATCHING
- /* First, check the other root's subjects for a match. */
- for( node = root_root; node; node = node->link ) {
- for( subject = node->subjects; subject; subject = subject->link ) {
- if( subject_equal( subject->str, subject_str ) ) {
- use_root( article, node ); /* use it instead */
- link_child( article );
- return; /* RETURN */
- }
- }
- }
- #endif
-
- /* Create a new root. */
- new = (ROOT *)safemalloc( sizeof (ROOT) );
- total.root++;
- new->articles = article;
- new->root_num = article->num;
- new->thread_cnt = 1;
- if( article->num ) {
- article->author = new_author();
- new->subject_cnt = 1;
- new->subjects = article->subject = new_subject();
- } else {
- new->subject_cnt = 0;
- new->subjects = Null(SUBJECT*);
- }
- article->root = new;
- new->link = root_root;
- root_root = new;
- }
-
- /* Add this article's subject onto the indicated root's list. Point the
- ** article at the root.
- */
- void
- use_root( article, root )
- ARTICLE *article;
- ROOT *root;
- {
- register SUBJECT *subject;
- register ROOT *root2;
- SUBJECT *hold, *child_subj = Null(SUBJECT*);
- ARTICLE *node;
-
- article->root = root;
-
- /* If it's a fake, there's no subject to add. */
- if( !article->num ) {
- return; /* RETURN */
- }
-
- /* If we haven't picked a unique message number to represent this root,
- ** use the first non-zero number we encounter. Which one doesn't matter.
- */
- if( !root->root_num ) {
- root->root_num = article->num;
- }
- article->author = new_author();
-
- /* Check if the new subject matches any of the other subjects in this root.
- ** If so, we just update the count. If not, check all the other roots for
- ** a match. If found, the new subject is common between the two roots, so
- ** we merge the two roots together.
- */
- root2 = root;
- #ifndef NO_SUBJECT_MATCHING
- do {
- #endif
- for( subject = root2->subjects; subject; subject = subject->link ) {
- if( subject_equal( subject->str, subject_str ) ) {
- article->subject = subject;
- subject->count++;
- #ifndef NO_SUBJECT_MATCHING
- if( root2 != root ) {
- merge_roots( root, root2 );
- }
- #endif
- return; /* RETURN */
- }
- }
- #ifndef NO_SUBJECT_MATCHING
- if( (root2 = root2->link) == Null(ROOT*) ) {
- root2 = root_root;
- }
- } while( root2 != root );
- #endif
-
- article->subject = hold = new_subject();
- root->subject_cnt++;
-
- /* Find subject of any pre-existing children. We want to insert the new
- ** subject before a child's to keep the subject numbering intuitive
- ** in the newsreader.
- */
- for( node = article->children; node; node = node->children ) {
- if( node->subject ) {
- child_subj = node->subject;
- break;
- }
- }
- if( !(subject = root->subjects) || subject == child_subj ) {
- hold->link = root->subjects;
- root->subjects = hold;
- } else {
- while( subject->link && subject->link != child_subj ) {
- subject = subject->link;
- }
- hold->link = subject->link;
- subject->link = hold;
- }
- }
-
- /* Check subjects in a case-insignificant, punctuation ignoring manner.
- */
- int
- subject_equal( str1, str2 )
- register char *str1, *str2;
- {
- register char ch1, ch2;
-
- while( (ch1 = *str1++) ) {
- if( ch1 == ' ' || ispunct( ch1 ) ) {
- while( *str1 && (*str1 == ' ' || ispunct( *str1 )) ) {
- str1++;
- }
- ch1 = ' ';
- } else if( isupper( ch1 ) ) {
- ch1 = tolower( ch1 );
- }
- if( !(ch2 = *str2++) ) {
- return 0;
- }
- if( ch2 == ' ' || ispunct( ch2 ) ) {
- while( *str2 && (*str2 == ' ' || ispunct( *str2 )) ) {
- str2++;
- }
- ch2 = ' ';
- } else if( isupper( ch2 ) ) {
- ch2 = tolower( ch2 );
- }
- if( ch1 != ch2 ) {
- return 0;
- }
- }
- if( *str2 ) {
- return 0;
- }
- return 1;
- }
-
- /* Create a new subject structure. */
- SUBJECT *
- new_subject()
- {
- register int len = strlen( subject_str ) + 1;
- register SUBJECT *subject;
-
- subject = (SUBJECT *)safemalloc( sizeof (SUBJECT) );
- total.subject++;
- subject->count = 1;
- subject->link = Null(SUBJECT*);
- subject->str = safemalloc( len );
- total.string1 += len;
- bcopy( subject_str, subject->str, len );
-
- return subject;
- }
-
- /* Create a new author structure. */
- AUTHOR *
- new_author()
- {
- register len = strlen( author_str ) + 1;
- register AUTHOR *author, *last_author;
-
- last_author = Null(AUTHOR*);
- for( author = author_root; author; author = author->link ) {
- #ifndef DONT_COMPARE_AUTHORS /* might like to define this to save time */
- if( strEQ( author->name, author_str ) ) {
- author->count++;
- return author; /* RETURN */
- }
- #endif
- last_author = author;
- }
-
- author = (AUTHOR *)safemalloc( sizeof (AUTHOR) );
- total.author++;
- author->count = 1;
- author->link = Null(AUTHOR*);
- author->name = safemalloc( len );
- total.string1 += len;
- bcopy( author_str, author->name, len );
-
- if( last_author ) {
- last_author->link = author;
- } else {
- author_root = author;
- }
- return author;
- }
-
- /* Insert all of root2 into root1, setting the proper root values and
- ** updating subject counts.
- */
- void
- merge_roots( root1, root2 )
- ROOT *root1, *root2;
- {
- register ARTICLE *node, *next;
- register SUBJECT *subject;
-
- /* Remember whoever's root num is lower. This could screw up a
- ** newsreader's kill-thread code if someone already saw the roots as
- ** being separate, but it must be done. The newsreader code will have
- ** to handle this as best as it can.
- */
- if( root1->root_num > root2->root_num ) {
- root1->root_num = root2->root_num;
- }
-
- for( node = root2->articles; node; node = next ) {
- /* For each article attached to root2, detach them, set the
- ** branch's root pointers to root1, and then attach it to root1.
- */
- next = node->siblings;
- unlink_child( node );
- node->siblings = Nullart;
- set_root( node, root1 ); /* sets children too */
- /* Link_child() depends on node->parent being null and node->root
- ** being set.
- */
- link_child( node );
- }
- root1->subject_cnt += root2->subject_cnt;
- if( !(subject = root1->subjects) ) {
- root1->subjects = root2->subjects;
- } else {
- while( subject->link ) {
- subject = subject->link;
- }
- subject->link = root2->subjects;
- }
- unlink_root( root2 );
- free_root( root2 );
- }
-
- /* When merging roots, we need to reset all the root pointers.
- */
- void
- set_root( node, root )
- ARTICLE *node;
- ROOT *root;
- {
- do {
- node->root = root;
- if( node->children ) {
- set_root( node->children, root );
- }
- } while( node = node->siblings );
- }
-
- /* Unlink a root from its neighbors. */
- void
- unlink_root( root )
- register ROOT *root;
- {
- register ROOT *node;
-
- if( (node = root_root) == root ) {
- root_root = root->link;
- } else {
- while( node->link != root ) {
- node = node->link;
- }
- node->link = root->link;
- }
- }
-
- /* Free an article and its message-id string. All other resources must
- ** already be free, and it must not be attached to any threads.
- */
- void
- free_article( this )
- ARTICLE *this;
- {
- register ARTICLE *art;
-
- if( (art = this->domain->ids) == this ) {
- if( !(this->domain->ids = this->id_link) ) {
- free_domain( this->domain );
- }
- } else {
- while( this != art->id_link ) {
- art = art->id_link;
- }
- art->id_link = this->id_link;
- }
- total.string2 -= strlen( this->id ) + 1;
- free( this->id );
- free( this );
- total.article--;
- }
-
- /* Free the domain only when its last unique id has been freed. */
- void
- free_domain( this )
- DOMAIN *this;
- {
- register DOMAIN *domain;
-
- if( this == (domain = &unk_domain) ) {
- return;
- }
- if( this == next_domain ) { /* help expire routine skip freed domains */
- next_domain = next_domain->link;
- }
- while( this != domain->link ) {
- domain = domain->link;
- }
- domain->link = this->link;
- total.string2 -= strlen( this->name ) + 1;
- free( this->name );
- free( this );
- total.domain--;
- }
-
- /* Free the subject structure and its string. */
- void
- free_subject( this )
- SUBJECT *this;
- {
- total.string1 -= strlen( this->str ) + 1;
- free( this->str );
- free( this );
- total.subject--;
- }
-
- /* Free a root. It must already be unlinked. */
- void
- free_root( this )
- ROOT *this;
- {
- free( this );
- total.root--;
- }
-
- /* Free the author structure when it's not needed any more. */
- void
- free_author( this )
- AUTHOR *this;
- {
- total.string1 -= strlen( this->name ) + 1;
- free( this->name );
- free( this );
- total.author--;
- }
-