home *** CD-ROM | disk | FTP | other *** search
/ Education Sampler 1992 [NeXTSTEP] / Education_1992_Sampler.iso / Programming / Source / WAIS / ir / irfiles.h < prev    next >
Encoding:
C/C++ Source or Header  |  1992-02-02  |  4.9 KB  |  157 lines

  1. /* WIDE AREA INFORMATION SERVER SOFTWARE:
  2.    No guarantees or restrictions.  See the readme file for the full standard
  3.    disclaimer.
  4.  
  5.    Brewster@think.com
  6. */
  7.  
  8. /* include file for irfiles.c */
  9.  
  10. #ifndef IRFILES_H
  11. #define IRFILES_H
  12.  
  13. #include "cdialect.h"
  14. #include "cutil.h"
  15. #include "hutil.h" /* for word_memory_hashtable */
  16. #include "ustubs.h" /* for time_t */
  17.  
  18. /* filename extensions for various components */
  19. #define dictionary_ext            ".dct"
  20. #define filename_table_ext        ".fn"
  21. #define headline_table_ext        ".hl"
  22. #define document_table_ext        ".doc"
  23. #define index_ext            ".inv"
  24. #define source_ext             ".src"
  25.  
  26. /* these dictionary definitions are used in irhash,irverify, and irfiles */
  27. #define DICTIONARY_HEADER_SIZE 4
  28. #define DICTIONARY_BLOCK_SIZE 1000L  /* in entries, not bytes */
  29. #define DICTIONARY_ENTRY_HASH_CODE_SIZE 2
  30. /* #define DICTIONARY_ENTRY_COUNT_SIZE 3  moved to inverted file */
  31. /* #define DICTIONARY_ENTRY_INDEX_BLOCK_SIZE 4 not used and too long a symbol*/
  32. #define DICTIONARY_ELEMENT_SIZE 6 /* was 9 */
  33. #define DICTIONARY_SIZE 524288L
  34.  
  35. #define INDEX_HEADER_SIZE 4
  36. #define INDEX_BLOCK_SIZE_SIZE 2
  37. #define NEXT_INDEX_BLOCK_SIZE 4
  38. #define INDEX_BLOCK_FLAG_SIZE 1
  39. #define INDEX_BLOCK_HEADER_SIZE 7
  40. #define NUMBER_OF_OCCURANCES_SIZE 4
  41. #define INDEX_BLOCK_NOT_FULL_FLAG 101
  42. #define INDEX_BLOCK_FULL_FLAG 69
  43. #define INDEX_BLOCK_DICTIONARY_FLAG 123
  44.  
  45. #define DOCUMENT_ID_SIZE 4
  46. #define WORD_POSITION_SIZE 0
  47. #define CHARACTER_POSITION_SIZE 3
  48. #define WEIGHT_SIZE 1
  49. #define INDEX_ELEMENT_SIZE 8
  50.  
  51. typedef struct database {
  52.     char*    database_file;
  53.     FILE*    dictionary_stream;
  54.     FILE*    filename_table_stream;
  55.     FILE*    headline_table_stream;
  56.     FILE*    document_table_stream;
  57.     FILE*    index_stream;
  58.     word_memory_hashtable* the_word_memory_hashtable;
  59.     long    doc_table_allocated_entries;
  60.     long    dictionary_size;  /* in entries */
  61.     long     number_of_words; /* for building.  number of different words */
  62.     long    index_file_number; /* for building. */
  63. } database;
  64.  
  65. typedef struct document_table_entry {
  66.     long    filename_id;
  67.     long    headline_id;
  68.     long    start_character;
  69.     long    end_character;
  70.     long     document_length; /* in characters */
  71.     long    number_of_lines; /* in lines */
  72.     time_t  date;            /* 0 if unknown */
  73. } document_table_entry;
  74.  
  75. #ifdef __cplusplus
  76. /* declare these as C style functions */
  77. extern "C"
  78.     {
  79. #endif /* def __cplusplus */
  80.  
  81. database*     openDatabase _AP((char* name, boolean initialize,boolean for_search));
  82. void        closeDatabase _AP((database* the_db));
  83. void        disposeDatabase _AP((database* the_db));
  84.  
  85. void initialize_index_files _AP((database* db));
  86.  
  87. char *read_filename_table_entry _AP((long position, 
  88.                   char* filename,
  89.                   char* type, 
  90.                   time_t* file_write_date,
  91.                   database* db));
  92.  
  93. long write_filename_table_entry _AP((char* filename, char *type, database* db));
  94. boolean filename_in_database _AP((char *filename, char *type,
  95.                   time_t *write_file_date, database *db));
  96. char *read_headline_table_entry _AP((long position,database* db));
  97. long write_headline_table_entry _AP((char* headline, database* db));
  98.  
  99.  
  100. boolean read_document_table_entry 
  101.   _AP((document_table_entry* doc_entry,long number,database* db));
  102.  
  103. long write_document_table_entry
  104.   _AP((document_table_entry* doc_table_entry, database* db));
  105.  
  106. long next_document_id _AP((database* db));
  107.  
  108.  
  109. void close_dictionary_file _AP((database *db));
  110.  
  111. long add_word_to_dictionary
  112.      _AP((char *word, long index_file_block_number, long number_of_occurances,
  113.      database* db));
  114. long look_up_word_in_dictionary _AP((char *word,database* db));
  115. long init_dict_file_for_writing _AP((database *db));
  116. void init_dict_file_detailed _AP((FILE* dictionary_stream,
  117.                   long number_of_blocks));
  118.  
  119.  
  120. boolean register_src_structure _AP((char *filename));
  121. boolean write_src_structure _AP((char *filename, 
  122.                  char *database_name, 
  123.                  char *typename,
  124.                  char **filenames, 
  125.                  long number_of_filename,
  126.                  boolean export_database,
  127.                  long tcp_port));
  128.  
  129.  
  130. long allocate_index_block _AP((long how_large, FILE* stream));
  131.  
  132. unsigned char *read_dictionary_block _AP((unsigned char* block,
  133.                       long position,long length,
  134.                       FILE* stream));
  135.                                       
  136. void print_dictionary _AP((database* db));
  137. char *dictionary_block_word _AP((long i,unsigned char* block));
  138. long dictionary_block_position _AP((long i,unsigned char* block));
  139. long dictionary_block_word_occurances _AP((long i,unsigned char* block));
  140. void print_dictionary_block _AP((unsigned char* block,long size));
  141.  
  142. /* database functions */
  143. char* dictionary_filename _AP((char* destination, database* db));
  144. char* filename_table_filename _AP((char* destination, database* db));
  145. char* headline_table_filename _AP((char* destination, database* db));
  146. char* document_table_filename _AP((char* destination, database* db));
  147. char* index_filename _AP((char* destination, database* db));
  148. char* index_filename_with_version _AP((long version, char* destination, 
  149.                   database* db));
  150. char* source_filename _AP((char* destination, database* db));
  151.  
  152. #ifdef __cplusplus
  153.     }
  154. #endif /* def __cplusplus */
  155.  
  156. #endif /* IRFILES_H */
  157.