The Datafile PD-CD 3

home *** CD-ROM | disk | FTP | other *** search

/ The Datafile PD-CD 3 / PDCD_3.iso / internet / tcpipsrc / h / if / NNTP / c / nntpidhist < prev

Wrap

Text File | 1995-02-21 | 12.0 KB | 579 lines

/* * Title: nntp/idhist.c * Version: 0.01 (02-Feb-1995) * * Author: Adam Goodfellow * Contact: adam@comptech.demon.co.uk * * A set of primatives for providing global duplicate article * detection for NNTP clients. This version for use in !TCPIPv2 * * Copyright (c) Adam Goodfellow 1995. All Rights Reserved. * * This software is provided AS-IS. The author gives no warranty, * real or assumed, and takes no responsibility whatsoever for any * use or misuse of this software, or any damage created by its use * or misuse. * * This software may be freely copied and distributed provided that * no part of this NOTICE is deleted or edited in any manner. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <time.h> #include "flex.h" typedef struct idhist_str { char *store; /* Pointer to ID store memory */ int next; /* Next free location in store */ int start; /* Start of chain of newly added IDs */ int end; /* End of chain of newly added IDs */ int count; /* Total number of ID in store */ int hashsize; /* Hash table size */ } idhist_str; typedef struct idhist_str *idhistory; time_t time_aton(char *s); char *time_ntoa(char *buf, time_t t); time_t time_readgmt(void); void time_timetoordinals(time_t t, struct tm *tp); time_t time_ordinalstotime(struct tm *tp); idhistory idhist_init(int size); void idhist_kill(idhistory hist); int idhist_addid(idhistory hist, char *id, time_t t); int idhist_checkid(idhistory hist, char *id); int idhist_save(idhistory hist, char *file, int maxage); idhistory idhist_load(char *file /*, int maxage */); char *idhist_trimstr(char *s); char *idhist_msgid(idhistory hist, int id, char *buf); int idhist_numids(idhistory hist); static int alignup(int n, int bits); static int hashof(idhistory hist, char *id); static int ensure_store(idhistory hist, int n); /* * The ID store consist of a hash table followed * by structures as follows: * int Next ID in hash chain * int Next ID in chonological order * time_t The ID's time stamp * char[] The actual ID string */ #define ID_CHAIN 0 /* Next object on hash chain */ #define ID_NEXT 4 /* Next object as added */ #define ID_TIME 8 /* Time of news feed that this object was fetched */ #define ID_STRING 12 /* Actual ID string without the "<" and ">" */ #define ID_HASH(h) (h*4) /* Another legibility aid */ /* * Align a number up to a power of 2 */ static int alignup(int n, int bits) { return n-1+(1<<bits) & (0xffffffff<<bits); } /* * Calculate a hash value of an * article ID string */ static int hashof(idhistory hist, char *id) { int h; for (h=0; *id; id++) h = ((h<<6)+*id) % hist->hashsize; return h; } /* * Initialise the article ID store * Hash table size is selected based upon starting * number of IDs given in size */ idhistory idhist_init(int size) { int i, *p; idhistory hist; /* * Prime numbers for hash table sizes */ int hashsizetab[] = { 0x0101, 0x0209, 0x0407, 0x0805, 0x1003, 0x2011, 0x401b, 0 }; if (hist = (idhistory)malloc(sizeof(idhist_str)), hist==NULL) return NULL; /* * From the size (ie number of id in file) work out a suitable * hash table size. The size chosen that which is at least * size/2, or the highest in the hash sizes list above. */ for (i=0; hashsizetab[i]!=0 && hashsizetab[i]<size; i++); if (i) --i; hist->hashsize = hashsizetab[i]; /* * Allocate some memory, rounded up to a multiple of 1K */ if (!flex_alloc((flex_ptr)&hist->store, alignup(hist->hashsize*4,10))) { idhist_kill(hist); return NULL; } /* * clear the hash table */ for (i = 0, p = (int *)hist->store; i<hist->hashsize; i++) p[i] = 0; /* * Set the next store marker and * initialise list offsets */ hist->next = hist->hashsize*4; hist->start = 0; hist->end = 0; hist->count = 0; return hist; } /* * Ensure that the total ID store size * is at least n. */ static int ensure_store(idhistory hist, int n) { if (n>=flex_size((flex_ptr)&hist->store)) return flex_extend((flex_ptr)&hist->store, alignup(n, 10)); return 1; } /* * Add an ID to the store. * Returns <0 if ID allready present. * =0 if cat add it * >0 if added the id * t is time of news feed starting */ int idhist_addid(idhistory hist, char *id, time_t t) { int p, h, n; if (hist==NULL) return 0; h = hashof(hist, id); /* * Scan an ID chain looking for an ID * If a duplicate exists, it is more likely to be near the head */ for (p = *(int *)(hist->store+ID_HASH(h)); p!=0; p = *(int *)(hist->store+p+ID_CHAIN)) { if (!strcmp(id, hist->store+p+ID_STRING)) { /* * If time stamp is zero, then update this entry * with the given time stamp */ if (*(int *)(hist->store+p+ID_TIME)==0) { *(int *)(hist->store+p+ID_TIME) = t; if (t!=0) hist->count += 1; } return -p; } } /* * Word aligned length of ID */ n = alignup(strlen(id)+1, 2); /* * Ensure space for ID we are adding */ if (!ensure_store(hist, hist->next+n+ID_STRING)) return 0; /* * Copy in the ID with a empty chain link */ *(int *)(hist->store+hist->next) = 0; strcpy(hist->store+hist->next+ID_STRING,id); /* * Now link into the hash chain * Optimisation: usually, a duplicate will be a fairly recently * Stored ID, so allways link at head of a chain rather than end * This also saves scanning for end of chain */ p = *(int *)(hist->store+ID_HASH(h)); *(int *)(hist->store+ID_HASH(h)) = hist->next; if (p) *(int *)(hist->store+hist->next+ID_CHAIN) = p; /* * Now link onto global chain and store the time * to keep in order when they are saved */ *(int *)(hist->store+hist->next+ID_NEXT) = 0; *(int *)(hist->store+hist->next+ID_TIME) = t; if (!hist->start) { hist->start = hist->next; hist->end = hist->start; } else { *(int *)(hist->store+hist->end+ID_NEXT) = hist->next; hist->end = hist->next; } /* * Update end marker * And return original end marker */ p = hist->next; hist->next += (n+ID_STRING); /* * Only count IDs with non zero timestamps */ hist->count += (t!=0); return p; } /* * Check if allready have the given ID */ int idhist_checkid(idhistory hist, char *id) { int h, p; if (hist==NULL) return 0; h = hashof(hist, id); /* * Scan an ID chain looking for an ID * If a duplicate exists, it is more likely to be near the head */ for (p = *(int *)(hist->store+ID_HASH(h)); p!=0; p = *(int *)(hist->store+p+ID_CHAIN)) { if (!strcmp(id, hist->store+p+ID_STRING)) return 1; } return 0; } /* * Return the ID string associate with an ID handle */ char *idhist_msgid(idhistory hist, int id, char *buf) { int n; if (!hist || id==0) return 0; if (!buf) { n = strlen(hist->store+id+ID_STRING); if (buf = (char *)malloc(n+1), buf==NULL) return NULL; } strcpy(buf, hist->store+id+ID_STRING); return buf; } /* * Extract IDs from ID store in order they were added * and write to the history file. */ int idhist_save(idhistory hist, char *file, int maxage) { FILE *fp; int p; time_t t, last_t = 0; time_t oldest; int count; char buf[256]; struct tm tm; if (hist==NULL) return 0; if (fp = fopen(file, "w"), fp==NULL) return 0; /* * IDs older than this are expired */ oldest = time_readgmt()-(maxage*60*60); /* * Count the number of IDs that will be written */ count = 0; for (p = hist->start; p!=0; p = *(int *)(hist->store+p+ID_NEXT)) { if (*(int *)(hist->store+p+ID_TIME)>=oldest) ++count; } /* * Write out the number of IDs for hash size selection when re-loading */ fprintf(fp, "# size %d\n", count); /* * Write out IDs and fetch times */ for (p = hist->start; p!=0; p = *(int *)(hist->store+p+ID_NEXT)) { /* * If time stamp of this ID not same as previous one * write out a new time stamp, unless it is zero */ if (t = *(int *)(hist->store+p+ID_TIME), t>=oldest) { if (t!=last_t) { time_timetoordinals(t, &tm); fprintf(fp, "# time %.2d%.2d%.2d %.2d%.2d%.2d\n", tm.tm_year, tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); last_t = t; } /* * Dont write out IDs with a zero time stamp * or timestamp older than maxage */ if (last_t>=oldest) { /* Copy string out due to danger of flex block moving * when fprintf() is called. */ idhist_msgid(hist, p, buf); fprintf(fp, "%s\n", buf); } } } fclose(fp); return 1; } /* * Dispose of the ID store */ void idhist_kill(idhistory hist) { if (hist) { if (hist->store!=NULL) flex_free((flex_ptr)&hist->store); free(hist->store); } } /* * Load ID store with ids from a file * ignoring 'expired' IDs. ie ID older than maxage hours */ idhistory idhist_load(char *file /*, int maxage */) { char *p; FILE *fp; time_t t = 0; /* time_t oldest; */ char buf[256]; idhistory hist; /* * IDs older than this are expired */ /* oldest = time_readgmt()-(maxage*60*60); */ if (fp = fopen(file, "r"), fp==NULL) return 0; /* * If a new style history, the first line will * contain the number if IDs in the history file * * Use this value to allow init() to work out the has table size. * For old history files, this line wont present, so init with a * default size. * * As this size represent the pre-expirey size, then after expirely, * the hash table will still usually be adequate to efficiently * accomodate new IDs. */ if (fgets(buf, 255, fp)!=NULL && !strncmp(buf+2, "# size ", 7)) { /* * New style history */ hist = idhist_init(atoi(buf+7)); } else { /* * Old style history */ rewind(fp); hist = idhist_init(0); } if (hist==NULL) return NULL; while (fgets(buf, 255, fp)!=NULL) { if (*buf=='#') { if (!strncmp(buf+2, "time ", 5)) t = time_aton(buf+7); /* * Old style history file time stamp in the form "# yymmdd hhmmss" */ else t = time_aton(buf+2); } else { /* * Expirey process consists of ignoring IDs that are * stamped with a time/date prior to 'oldest'. */ /* if (t>=oldest) { */ p = idhist_trimstr(buf); if (!idhist_checkid(hist, p)) { if (!idhist_addid(hist, p, t)) { fclose(fp); idhist_kill(hist); return NULL; } } /* } */ } } fclose(fp); return hist; } /* * Skip junk at start of ID * Strip junk off end of ID * Return pointer to start of ID * Done in place, so original string is 'corrupted' */ char *idhist_trimstr(char *s) { char *p; if (*s=='<') ++s; for (p = s; *p!='>' && *p>' '; p++); *p = '\0'; return s; } int idhist_numids(idhistory hist) { if (!hist) return -1; return hist->count; } /* * Convert string with time in form yymmdd hhmmss into * a ANSI C time_t value */ time_t time_aton(char *s) { struct tm tm; while (*s && !isdigit(*s)) ++s; tm.tm_year = (s[0]-'0')*10 + (s[1]-'0'); tm.tm_mon = ((s[2]-'0')*10 + (s[3]-'0'))-1; tm.tm_mday = (s[4]-'0')*10 + (s[5]-'0'); tm.tm_hour = (s[7]-'0')*10 + (s[8]-'0'); tm.tm_min = (s[9]-'0')*10 + (s[10]-'0'); tm.tm_sec = (s[11]-'0')*10 + (s[12]-'0'); tm.tm_isdst = 0; tm.tm_wday = 0; tm.tm_yday = 0; return time_ordinalstotime(&tm); } /* * Convert time into string yymmdd hhmmss * t will usually be in GMT * buf should be at least 16 chars */ char *time_ntoa(char *buf, time_t t) { struct tm tm; time_timetoordinals(t, &tm); sprintf(buf, "%.2d%.2d%.2d %.2d%.2d%.2d", tm.tm_year%100, tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); return buf; }