home *** CD-ROM | disk | FTP | other *** search
- /*
- * Title: nntp/idhist.c
- * Version: 0.01 (02-Feb-1995)
- *
- * Author: Adam Goodfellow
- * Contact: adam@comptech.demon.co.uk
- *
- * A set of primatives for providing global duplicate article
- * detection for NNTP clients. This version for use in !TCPIPv2
- *
- * Copyright (c) Adam Goodfellow 1995. All Rights Reserved.
- *
- * This software is provided AS-IS. The author gives no warranty,
- * real or assumed, and takes no responsibility whatsoever for any
- * use or misuse of this software, or any damage created by its use
- * or misuse.
- *
- * This software may be freely copied and distributed provided that
- * no part of this NOTICE is deleted or edited in any manner.
- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include <time.h>
-
- #include "flex.h"
-
- typedef struct idhist_str
- {
- char *store; /* Pointer to ID store memory */
- int next; /* Next free location in store */
- int start; /* Start of chain of newly added IDs */
- int end; /* End of chain of newly added IDs */
- int count; /* Total number of ID in store */
- int hashsize; /* Hash table size */
- } idhist_str;
-
- typedef struct idhist_str *idhistory;
-
- time_t time_aton(char *s);
- char *time_ntoa(char *buf, time_t t);
-
- time_t time_readgmt(void);
- void time_timetoordinals(time_t t, struct tm *tp);
- time_t time_ordinalstotime(struct tm *tp);
-
- idhistory idhist_init(int size);
- void idhist_kill(idhistory hist);
- int idhist_addid(idhistory hist, char *id, time_t t);
- int idhist_checkid(idhistory hist, char *id);
- int idhist_save(idhistory hist, char *file, int maxage);
- idhistory idhist_load(char *file /*, int maxage */);
- char *idhist_trimstr(char *s);
- char *idhist_msgid(idhistory hist, int id, char *buf);
- int idhist_numids(idhistory hist);
-
- static int alignup(int n, int bits);
- static int hashof(idhistory hist, char *id);
- static int ensure_store(idhistory hist, int n);
-
-
- /*
- * The ID store consist of a hash table followed
- * by structures as follows:
- * int Next ID in hash chain
- * int Next ID in chonological order
- * time_t The ID's time stamp
- * char[] The actual ID string
- */
- #define ID_CHAIN 0 /* Next object on hash chain */
- #define ID_NEXT 4 /* Next object as added */
- #define ID_TIME 8 /* Time of news feed that this object was fetched */
- #define ID_STRING 12 /* Actual ID string without the "<" and ">" */
-
- #define ID_HASH(h) (h*4) /* Another legibility aid */
-
- /*
- * Align a number up to a power of 2
- */
- static int alignup(int n, int bits)
- {
- return n-1+(1<<bits) & (0xffffffff<<bits);
- }
-
- /*
- * Calculate a hash value of an
- * article ID string
- */
- static int hashof(idhistory hist, char *id)
- {
- int h;
-
- for (h=0; *id; id++)
- h = ((h<<6)+*id) % hist->hashsize;
-
- return h;
- }
-
- /*
- * Initialise the article ID store
- * Hash table size is selected based upon starting
- * number of IDs given in size
- */
- idhistory idhist_init(int size)
- {
- int i, *p;
- idhistory hist;
-
- /*
- * Prime numbers for hash table sizes
- */
- int hashsizetab[] =
- { 0x0101, 0x0209, 0x0407, 0x0805, 0x1003, 0x2011, 0x401b, 0 };
-
- if (hist = (idhistory)malloc(sizeof(idhist_str)), hist==NULL)
- return NULL;
-
- /*
- * From the size (ie number of id in file) work out a suitable
- * hash table size. The size chosen that which is at least
- * size/2, or the highest in the hash sizes list above.
- */
- for (i=0; hashsizetab[i]!=0 && hashsizetab[i]<size; i++);
- if (i) --i;
- hist->hashsize = hashsizetab[i];
-
- /*
- * Allocate some memory, rounded up to a multiple of 1K
- */
- if (!flex_alloc((flex_ptr)&hist->store, alignup(hist->hashsize*4,10)))
- {
- idhist_kill(hist);
- return NULL;
- }
-
- /*
- * clear the hash table
- */
- for (i = 0, p = (int *)hist->store; i<hist->hashsize; i++)
- p[i] = 0;
-
- /*
- * Set the next store marker and
- * initialise list offsets
- */
- hist->next = hist->hashsize*4;
- hist->start = 0;
- hist->end = 0;
- hist->count = 0;
-
- return hist;
- }
-
-
- /*
- * Ensure that the total ID store size
- * is at least n.
- */
- static int ensure_store(idhistory hist, int n)
- {
- if (n>=flex_size((flex_ptr)&hist->store))
- return flex_extend((flex_ptr)&hist->store, alignup(n, 10));
-
- return 1;
- }
-
-
- /*
- * Add an ID to the store.
- * Returns <0 if ID allready present.
- * =0 if cat add it
- * >0 if added the id
- * t is time of news feed starting
- */
- int idhist_addid(idhistory hist, char *id, time_t t)
- {
- int p, h, n;
-
- if (hist==NULL)
- return 0;
-
- h = hashof(hist, id);
-
- /*
- * Scan an ID chain looking for an ID
- * If a duplicate exists, it is more likely to be near the head
- */
- for (p = *(int *)(hist->store+ID_HASH(h)); p!=0; p = *(int *)(hist->store+p+ID_CHAIN))
- {
- if (!strcmp(id, hist->store+p+ID_STRING))
- {
- /*
- * If time stamp is zero, then update this entry
- * with the given time stamp
- */
- if (*(int *)(hist->store+p+ID_TIME)==0)
- {
- *(int *)(hist->store+p+ID_TIME) = t;
- if (t!=0)
- hist->count += 1;
- }
- return -p;
- }
- }
-
- /*
- * Word aligned length of ID
- */
- n = alignup(strlen(id)+1, 2);
-
- /*
- * Ensure space for ID we are adding
- */
- if (!ensure_store(hist, hist->next+n+ID_STRING))
- return 0;
-
- /*
- * Copy in the ID with a empty chain link
- */
- *(int *)(hist->store+hist->next) = 0;
- strcpy(hist->store+hist->next+ID_STRING,id);
-
- /*
- * Now link into the hash chain
- * Optimisation: usually, a duplicate will be a fairly recently
- * Stored ID, so allways link at head of a chain rather than end
- * This also saves scanning for end of chain
- */
- p = *(int *)(hist->store+ID_HASH(h));
- *(int *)(hist->store+ID_HASH(h)) = hist->next;
-
- if (p)
- *(int *)(hist->store+hist->next+ID_CHAIN) = p;
-
- /*
- * Now link onto global chain and store the time
- * to keep in order when they are saved
- */
- *(int *)(hist->store+hist->next+ID_NEXT) = 0;
- *(int *)(hist->store+hist->next+ID_TIME) = t;
-
- if (!hist->start)
- {
- hist->start = hist->next;
- hist->end = hist->start;
- }
- else
- {
- *(int *)(hist->store+hist->end+ID_NEXT) = hist->next;
- hist->end = hist->next;
- }
-
- /*
- * Update end marker
- * And return original end marker
- */
- p = hist->next;
- hist->next += (n+ID_STRING);
-
- /*
- * Only count IDs with non zero timestamps
- */
- hist->count += (t!=0);
-
- return p;
- }
-
-
- /*
- * Check if allready have the given ID
- */
- int idhist_checkid(idhistory hist, char *id)
- {
- int h, p;
-
- if (hist==NULL)
- return 0;
-
- h = hashof(hist, id);
-
- /*
- * Scan an ID chain looking for an ID
- * If a duplicate exists, it is more likely to be near the head
- */
- for (p = *(int *)(hist->store+ID_HASH(h)); p!=0; p = *(int *)(hist->store+p+ID_CHAIN))
- {
- if (!strcmp(id, hist->store+p+ID_STRING))
- return 1;
- }
-
- return 0;
- }
-
- /*
- * Return the ID string associate with an ID handle
- */
- char *idhist_msgid(idhistory hist, int id, char *buf)
- {
- int n;
-
- if (!hist || id==0)
- return 0;
-
- if (!buf)
- {
- n = strlen(hist->store+id+ID_STRING);
- if (buf = (char *)malloc(n+1), buf==NULL)
- return NULL;
- }
-
- strcpy(buf, hist->store+id+ID_STRING);
- return buf;
- }
-
-
- /*
- * Extract IDs from ID store in order they were added
- * and write to the history file.
- */
- int idhist_save(idhistory hist, char *file, int maxage)
- {
- FILE *fp;
- int p;
- time_t t, last_t = 0;
- time_t oldest;
- int count;
- char buf[256];
- struct tm tm;
-
- if (hist==NULL)
- return 0;
-
- if (fp = fopen(file, "w"), fp==NULL)
- return 0;
-
- /*
- * IDs older than this are expired
- */
- oldest = time_readgmt()-(maxage*60*60);
-
- /*
- * Count the number of IDs that will be written
- */
- count = 0;
- for (p = hist->start; p!=0; p = *(int *)(hist->store+p+ID_NEXT))
- {
- if (*(int *)(hist->store+p+ID_TIME)>=oldest)
- ++count;
- }
-
- /*
- * Write out the number of IDs for hash size selection when re-loading
- */
- fprintf(fp, "# size %d\n", count);
-
- /*
- * Write out IDs and fetch times
- */
- for (p = hist->start; p!=0; p = *(int *)(hist->store+p+ID_NEXT))
- {
- /*
- * If time stamp of this ID not same as previous one
- * write out a new time stamp, unless it is zero
- */
- if (t = *(int *)(hist->store+p+ID_TIME), t>=oldest)
- {
- if (t!=last_t)
- {
- time_timetoordinals(t, &tm);
- fprintf(fp, "# time %.2d%.2d%.2d %.2d%.2d%.2d\n",
- tm.tm_year, tm.tm_mon+1, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec);
- last_t = t;
- }
- /*
- * Dont write out IDs with a zero time stamp
- * or timestamp older than maxage
- */
- if (last_t>=oldest)
- {
- /* Copy string out due to danger of flex block moving
- * when fprintf() is called.
- */
- idhist_msgid(hist, p, buf);
- fprintf(fp, "%s\n", buf);
- }
- }
- }
-
- fclose(fp);
-
- return 1;
- }
-
- /*
- * Dispose of the ID store
- */
- void idhist_kill(idhistory hist)
- {
- if (hist)
- {
- if (hist->store!=NULL)
- flex_free((flex_ptr)&hist->store);
- free(hist->store);
- }
- }
-
-
- /*
- * Load ID store with ids from a file
- * ignoring 'expired' IDs. ie ID older than maxage hours
- */
- idhistory idhist_load(char *file /*, int maxage */)
- {
- char *p;
- FILE *fp;
- time_t t = 0;
- /*
- time_t oldest;
- */
- char buf[256];
-
- idhistory hist;
-
- /*
- * IDs older than this are expired
- */
- /*
- oldest = time_readgmt()-(maxage*60*60);
- */
- if (fp = fopen(file, "r"), fp==NULL)
- return 0;
-
- /*
- * If a new style history, the first line will
- * contain the number if IDs in the history file
- *
- * Use this value to allow init() to work out the has table size.
- * For old history files, this line wont present, so init with a
- * default size.
- *
- * As this size represent the pre-expirey size, then after expirely,
- * the hash table will still usually be adequate to efficiently
- * accomodate new IDs.
- */
- if (fgets(buf, 255, fp)!=NULL && !strncmp(buf+2, "# size ", 7))
- {
- /*
- * New style history
- */
- hist = idhist_init(atoi(buf+7));
- }
- else
- {
- /*
- * Old style history
- */
- rewind(fp);
- hist = idhist_init(0);
- }
-
- if (hist==NULL)
- return NULL;
-
- while (fgets(buf, 255, fp)!=NULL)
- {
- if (*buf=='#')
- {
- if (!strncmp(buf+2, "time ", 5))
- t = time_aton(buf+7);
- /*
- * Old style history file time stamp in the form "# yymmdd hhmmss"
- */
- else
- t = time_aton(buf+2);
- }
- else
- {
- /*
- * Expirey process consists of ignoring IDs that are
- * stamped with a time/date prior to 'oldest'.
- */
- /*
- if (t>=oldest)
- {
- */
- p = idhist_trimstr(buf);
- if (!idhist_checkid(hist, p))
- {
- if (!idhist_addid(hist, p, t))
- {
- fclose(fp);
- idhist_kill(hist);
- return NULL;
- }
- }
- /*
- }
- */
- }
- }
-
- fclose(fp);
-
- return hist;
- }
-
- /*
- * Skip junk at start of ID
- * Strip junk off end of ID
- * Return pointer to start of ID
- * Done in place, so original string is 'corrupted'
- */
- char *idhist_trimstr(char *s)
- {
- char *p;
-
- if (*s=='<')
- ++s;
-
- for (p = s; *p!='>' && *p>' '; p++);
- *p = '\0';
-
- return s;
- }
-
- int idhist_numids(idhistory hist)
- {
- if (!hist)
- return -1;
-
- return hist->count;
- }
-
- /*
- * Convert string with time in form yymmdd hhmmss into
- * a ANSI C time_t value
- */
- time_t time_aton(char *s)
- {
- struct tm tm;
-
- while (*s && !isdigit(*s))
- ++s;
-
- tm.tm_year = (s[0]-'0')*10 + (s[1]-'0');
- tm.tm_mon = ((s[2]-'0')*10 + (s[3]-'0'))-1;
- tm.tm_mday = (s[4]-'0')*10 + (s[5]-'0');
- tm.tm_hour = (s[7]-'0')*10 + (s[8]-'0');
- tm.tm_min = (s[9]-'0')*10 + (s[10]-'0');
- tm.tm_sec = (s[11]-'0')*10 + (s[12]-'0');
-
- tm.tm_isdst = 0;
- tm.tm_wday = 0;
- tm.tm_yday = 0;
-
- return time_ordinalstotime(&tm);
- }
-
- /*
- * Convert time into string yymmdd hhmmss
- * t will usually be in GMT
- * buf should be at least 16 chars
- */
- char *time_ntoa(char *buf, time_t t)
- {
- struct tm tm;
-
- time_timetoordinals(t, &tm);
-
- sprintf(buf, "%.2d%.2d%.2d %.2d%.2d%.2d",
- tm.tm_year%100, tm.tm_mon+1, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec);
-
- return buf;
- }
-