home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.muug.mb.ca
/
2014.06.ftp.muug.mb.ca.tar
/
ftp.muug.mb.ca
/
pub
/
src
/
gopher
/
gopher1.01
/
gopherd
/
Waisindex.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-06-21
|
7KB
|
287 lines
#ifdef WAISSEARCH
/* WIDE AREA INFORMATION SERVER SOFTWARE
No guarantees or restrictions. See the readme file for the full standard
disclaimer.
Brewster@think.com
Heavily hacked by Paul Lindner (lindner@boombox.micro.umn.edu)
Do you even recognize this Brewster? :-)
*/
int ShowDate = 0;
#define _search_c
#include <ctype.h>
#ifndef ultrix
#include <string.h> /* for strlen() */
#endif
#include "../ir/cutil.h"
#include "../ir/irfiles.h"
#include "../ir/irlex.h"
#include "../ir/irext.h"
#include "../ir/irsearch.h"
#include "../ir/docid.h"
#include <math.h>
#include "gopherd.h"
FILE *logfile = NULL; /* the logfile */
char *log_file_name = NULL;
static char *DefaultDB = "index";
static char *MonthStr[] = {
"Jan", "Feb", "Mar", "Apr", "May", "June", "July", "Sept", "Oct",
"Nov", "Dec"
};
void
WaisIndexQuery(sockfd, index_directory, SearchWords, new_db_name, INDEXHost, INDEXPort, INDEXPath)
int sockfd;
char *index_directory;
char *SearchWords;
char *new_db_name;
char *INDEXHost;
int INDEXPort;
char *INDEXPath;
{
database* db;
long maxRawScore;
long normalScore;
char *cp;
char dateline[10];
long i;
query_parameter_type parameters;
boolean search_result;
char score[6];
static char ReturnLine[512];
float closeness;
GopherDirObj *gd;
GopherObj *gs;
gs = GSnew();
gd = GDnew(32);
if (DEBUG) {
fprintf(stderr, "IndexPath: %s\n", INDEXPath);
logfile = stderr; /** Log wais error messages to console **/
} else {
logfile = ufopen("/dev/null", "w+");
}
if (new_db_name == NULL) {
new_db_name = DefaultDB;
}
if (uchdir(index_directory)) {
Abortoutput(sockfd, "Couldn't change to index directory...");
return;
}
if (SearchWords != NULL && strlen(SearchWords) == 0) {
EveryWAISdocument(new_db_name);
return;
}
db = openDatabase(new_db_name, false, true);
if (db == NULL) {
sprintf(ReturnLine, "Failed to open database %s in index dir %s", new_db_name, index_directory);
Abortoutput(sockfd, ReturnLine);
writestring(sockfd, ".\r\n"); /** be polite **/
return;
}
parameters.max_hit_retrieved = 256;
set_query_parameter(SET_MAX_RETRIEVED_MASK, ¶meters);
search_result = false;
search_result |= search_for_words(SearchWords, db, 0);
if (search_result == true) {
/* the search went ok */
hit best_hit;
finished_search_word(db);
if (DEBUG)
printf("After finished_search\n");
for (i = 0; i < 200; i++){
if (0 != next_best_hit(&best_hit, db))
break; /* out of hits */
if (i == 0)
maxRawScore = best_hit.weight;
if (best_hit.weight > 0 &&
strstr(best_hit.filename, ".cache")==NULL){
long lines,length;
char** type = NULL;
normalScore = (long)floor((((double)best_hit.weight) /
((double)maxRawScore)) *
(MAX_NORMAL_SCORE + 1));
if (normalScore > MAX_NORMAL_SCORE)
normalScore = MAX_NORMAL_SCORE;
/*** Strip off the first part of the path in the filename*/
/*** Plus it gets rid of weird automount things... ***/
cp =strstr(best_hit.filename, INDEXPath);
if (cp == NULL)
cp = "Error in Hostdata!";
else
cp += strlen(INDEXPath);
sprintf(score,"%3d ",best_hit.weight);
waislog(0,99,"%s: Score %3d:%s",SearchWords,best_hit.weight,cp);
/** Make the outgoing string **/
ZapCRLF(best_hit.headline);
if (ShowDate) {
sprintf(dateline, "%.2s/%.2s/%.2s ",
best_hit.date + 2, best_hit.date + 4,
best_hit.date);
}
GSsetType(gs, '0');
GSsetTitle(gs, best_hit.headline);
GSsetHost(gs, INDEXHost);
GSsetPort(gs, INDEXPort);
sprintf(ReturnLine, "R%d-%d-/%s",
best_hit.start_character, best_hit.end_character,
cp);
if (!MacIndex)
GSsetPath(gs, ReturnLine);
else
GSsetPath(gs, cp);
GSsetWeight(gs, best_hit.weight);
GDaddGS(gd, gs);
}
if (DEBUG) {
printf("%s\n", ReturnLine);
printf("End Byte = %d\n", best_hit.end_character);
printf("Doc length = %d\n", best_hit.document_length);
printf("#lines = %d\n", best_hit.number_of_lines);
}
}
}
else {
/* something went awry in the search */
LOGGopher(sockfd, "Something went wrong in the search!\r\n");
writestring(sockfd, ".\r\n"); /*** be polite, don't screw up the client**/
return;
}
finished_best_hit(db);
/*** Finish the output ***/
/*** End with the period... ***/
if (UsingHTML)
GDtoNetHTML(gd, sockfd);
else
GDtoNet(gd, sockfd);
writestring(sockfd, ".\r\n");
/* free everything */
closeDatabase(db);
return;
}
EveryWAISdocument(sockfd, db, INDEXHost, INDEXPort, INDEXPath)
int sockfd;
char *db;
char *INDEXHost;
int INDEXPort;
char *INDEXPath;
{
FILE *dbcatalog;
char db_name[MAXPATHLEN];
char inputline[512];
String *Headline;
String *Filename;
int StartByte, EndByte;
GopherObj *gs;
GopherDirObj *gd;
boolean Headlineset = FALSE;
boolean DocIDset = FALSE;
gs = GSnew();
gd = GDnew();
Headline = STRnew();
Filename = STRnew();
strcpy(db_name, db);
strcat(db_name, ".cat");
dbcatalog = rfopen(db_name, "r");
while (fgets(inputline, sizeof(inputline), dbcatalog) != NULL) {
if (strncmp(inputline, "Headline: ", 10)==0) {
STRset(Headline, inputline +10);
Headlineset = TRUE;
}
else if (strncmp(inputline, "DocID: ", 7)==0) {
char *cp;
StartByte = atoi(inputline);
cp = strchr(inputline+7, ' ');
if (cp == NULL) break;
cp++;
EndByte = atoi(cp);
cp = strchr(inputline+7, ' ');
cp++;
if (cp == NULL) break;
cp =strstr(cp, INDEXPath);
if (cp == NULL) break;
STRset(Filename, cp);
DocIDset = TRUE;
}
if (DocIDset == TRUE && Headlineset == TRUE) {
char tmppath[512];
sprintf(tmppath, "R%d-%d-%s", StartByte, EndByte, STRget(Filename));
GSsetType(gs, '0');
GSsetTitle(gs, STRget(Headline));
GSsetHost(gs, INDEXHost);
GSsetPort(gs, INDEXPort);
GSsetPath(gs, tmppath);
GDaddGS(gd, gs);
DocIDset = FALSE;
Headlineset = FALSE;
}
}
}
#endif /** WAISSEARCH **/