home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World Komputer 1998 May
/
Pcwk5b98.iso
/
Borland
/
Cplus45
/
BC45
/
XREF.PAK
/
XREF.CPP
< prev
next >
Wrap
C/C++ Source or Header
|
1995-08-29
|
15KB
|
408 lines
/*------------------------------------------------------------------------*/
/* */
/* XREF.CPP */
/* */
/* Copyright (c) 1993 Borland International */
/* All Rights Reserved. */
/* */
/* Text cross referencing example */
/* */
/*------------------------------------------------------------------------*/
/*------------------------------------------------------------------------*/
/* */
/* Containers Used: TBinarySearchTreeImp, TIBinarySearchTreeImp, */
/* TSVectorImp. */
/* Other Classes: string. */
/* */
/* Data files provided: trivial.dat and text.dat. */
/*------------------------------------------------------------------------*/
#include <classlib\binimp.h>
#include <classlib\vectimp.h>
#include <cstring.h>
#include <regexp.h>
#include <fstream.h>
#include <strstrea.h>
#include <iomanip.h>
#include <dir.h>
// MaxRefs is the maximum number of references that a word can have
// before it is rejected (not put into the cross reference table).
//
const unsigned MaxRefs = 6;
// Indicates the minimum length a word must have to be a candidate to be
// put into the cross reference table.
//
const unsigned MinWordLength = 3;
/*------------------------------------------------------------------------*/
/* */
/* class TRecord */
/* */
/* Keeps track of the lines that a word appears on. */
/* */
/* Public Interface: */
/* */
/* TRecord( string word, unsigned lineNum ); */
/* */
/* Creates record with given word and line number. */
/* */
/* int operator == ( const TRecord& record ) const; */
/* */
/* Returns 1 if the Words compare equal, 0 otherwise. */
/* */
/* int operator < ( const TRecord& record ) const; */
/* */
/* Returns 1 if the Word in this record is lexically before the */
/* Word in the record parameter. */
/* */
/* int AddLineRef( unsigned lineNum ); */
/* */
/* Add given page number to word's list of line references. If */
/* the number of references >= 'MaxRefs' then return 0 and */
/* do not enter given line number in line ref list, else */
/* return 1 */
/* */
/* string GetWord() const; */
/* */
/* Return copy of 'Word' member. */
/* */
/* Global functions: */
/* */
/* ostream& operator << ( ostream&, const TRecord& ); */
/* */
/* Writes the record to the ostream. */
/* */
/*------------------------------------------------------------------------*/
class TRecord
{
public:
TRecord( const string& word, unsigned lineNum );
int operator == ( const TRecord& record ) const;
int operator < ( const TRecord& record ) const;
int AddLineRef( unsigned lineNum );
string GetWord() const;
unsigned GetPageCount() const;
friend ostream& operator << ( ostream&, const TRecord& );
private:
string Word;
TSVectorImp<unsigned> LineRef;
};
TRecord::TRecord( const string& word, unsigned lineNum ) :
Word( word ),
LineRef(MaxRefs)
{
AddLineRef(lineNum);
}
inline int TRecord::operator == ( const TRecord& record ) const
{
return Word == record.Word;
}
inline int TRecord::operator < ( const TRecord& record ) const
{
return Word < record.Word;
}
int TRecord::AddLineRef( unsigned lineNum )
{
if( LineRef.Count() < MaxRefs )
{
if( LineRef.Find( lineNum ) == UINT_MAX )
LineRef.Add( lineNum );
return 1;
}
return 0;
}
string TRecord::GetWord() const
{
return Word;
}
unsigned TRecord::GetPageCount() const
{
return LineRef.Count();
}
ostream& operator << ( ostream& os, const TRecord& rec )
{
char buf[80];
ostrstream temp( buf, sizeof(buf) );
temp.setf( ios::left );
temp << setw(20) << rec.Word;
for( unsigned i = 0; i < rec.LineRef.Count()-1; i++ )
{
temp << rec.LineRef[i] << ','; // write all but last line number
} // followed by a comma
temp << rec.LineRef[i] << ends; // write last line number
os << buf;
return os;
}
/*------------------------------------------------------------------------*/
/* */
/* class TCrossRef */
/* */
/* Responsible for reading a trivial word file and text file, */
/* then creating and printing a cross reference list of the words */
/* in the text file. The trivial word file contains words that will */
/* not be included in the cross reference. The text file consists */
/* of ordinary ASCII text. The ouput is an alphabetical list of */
/* words in the text file and the numbers of the lines on which they */
/* occur. */
/* */
/* Public Interface: */
/* */
/* TCrossRef( string trivialWordFileName, string textFileName ); */
/* */
/* Read trivial word file and store in a binary search tree. */
/* Then read each word in from the text file and process it. */
/* If either file cannot be opened or an error occurs during */
/* reading a TXFileReadError exception is thrown. */
/* */
/* ~TCrossRef(); */
/* */
/* Flushes the list of words in the cross reference. */
/* */
/* PrintCrossReferences( ostream& ); */
/* */
/* Print each word and the lines it appears on in the text file. */
/* */
/* Nested class TXFileReadError. */
/* */
/* An object of type TXFileReadError is thrown if either the */
/* trivial word or cross reference file could not be opened or */
/* read. To find out which file it was use the WhichFile() */
/* member function. */
/* */
/*------------------------------------------------------------------------*/
class TCrossRef
{
public:
class TXFileReadError : public xmsg
{
public:
TXFileReadError( const string& file )
: xmsg( string( "Error reading file: " ) + file ), File(file) {}
string WhichFile() const;
private:
string File;
};
TCrossRef( string trivialWordFileName, string textFileName );
~TCrossRef();
void PrintCrossReferences( ostream& );
private:
void ReadTrivialWords( string trivialWordFileName );
void ReadWords( string bookWordFileName );
void ProcessWord( string word, unsigned page );
static void PrintRecord( TRecord _BIDSFAR &o, void _BIDSFAR*arg );
void Reject( string word, string reason );
TBinarySearchTreeImp<string> RejectedWords;
TIBinarySearchTreeImp<TRecord> IndexOfWords;
unsigned CurrentPage;
};
//
// Construct a TCrossRef object, reading words from the file
// specified by trivialWordFileName into the tree of rejected
// words, then reading the text from the file specified by
// testFileName and building the cross reference tree.
//
TCrossRef::TCrossRef( string trivialWordFileName, string textFileName ) :
CurrentPage(1)
{
ReadTrivialWords( trivialWordFileName );
ReadWords( textFileName );
}
//
// Clean up.
//
TCrossRef::~TCrossRef()
{
IndexOfWords.Flush(1);
}
//
// Read trivial words from the specified file, convert them
// to lower case, and store them in the rejected words tree.
//
void TCrossRef::ReadTrivialWords( string trivialWordFileName )
{
ifstream TrivialWordFile( trivialWordFileName.c_str() );
if( !TrivialWordFile )
throw TXFileReadError( trivialWordFileName );
string wordFromFile;
while( TrivialWordFile >> wordFromFile )
{
wordFromFile.to_lower();
RejectedWords.Add( wordFromFile );
}
}
//
// Read words from the specified file and insert them into
// the cross reference tree. Along the way, convert to lower
// case and strip out any non-text characters and all text
// following any such characters within a word. (Quick and
// dirty hack for dealing with possessives, etc.)
//
void TCrossRef::ReadWords( string textFileName )
{
ifstream XrefFile( textFileName.c_str() );
if( !XrefFile )
throw TXFileReadError( textFileName );
TRegexp WordOnly( "[a-z]+" );
unsigned currentLine = 1;
while( XrefFile )
{
char buf[128];
XrefFile.getline( buf, sizeof(buf) );
if( XrefFile )
{
string word;
istrstream in( buf );
while( in )
{
in >> word;
if( in )
{
word.to_lower(); // store as lower case
// use WordOnly as filter to strip out trailing
// non-text characters and any characters following.
ProcessWord( word(WordOnly), currentLine );
}
}
currentLine++;
}
}
}
//
// Figure out whether a word should be added to the cross
// reference tree. If not, explain. If so, add it.
//
void TCrossRef::ProcessWord( string word, unsigned lineNum )
{
if( RejectedWords.Find( word ) != 0 )
Reject( word, "found in rejected word file" );
else if( word.length() < MinWordLength )
Reject( word, "too short" );
else
{
TRecord *recordToUpdate = IndexOfWords.Find( &TRecord(word,lineNum) );
if( recordToUpdate == 0 )
{
// The word isn't in the tree yet. Add a
// new record to contain it.
IndexOfWords.Add( new TRecord(word,lineNum) );
}
else if( recordToUpdate->GetPageCount() >= MaxRefs )
{
Reject( word, "reference count too large" );
IndexOfWords.Detach( recordToUpdate, 1 );
}
else
{
// The word is already in the tree. Add the
// additional line reference.
recordToUpdate->AddLineRef( lineNum );
}
}
}
//
// Callback for ForEach() to print a record to the stream
// pointed to by 'arg'.
//
void TCrossRef::PrintRecord( TRecord &o, void *arg )
{
(*(ostream*)arg) << o << endl;
}
//
// Print the cross references to the specified stream.
//
void TCrossRef::PrintCrossReferences( ostream& os )
{
os << "References:\n\n";
os.setf( ios::left, ios::adjustfield );
IndexOfWords.ForEach( PrintRecord, &os );
}
//
// Process rejected words by adding the word to the
// rejected words list and printing a message indicating
// why the word was rejected.
//
void TCrossRef::Reject( string word, string reason )
{
RejectedWords.Add( word );
cerr << '\'' << word << "' rejected, " << reason << endl;
}
string TCrossRef::TXFileReadError::WhichFile() const
{
return File;
}
/*------------------------------------------------------------------------*/
int main()
{
try
{
string TrivialWordFile;
string XrefFile;
cerr << "Enter name of trivial word file: ";
cin >> TrivialWordFile;
cerr << "Enter name of file to cross reference: ";
cin >> XrefFile;
TCrossRef CrossRef( TrivialWordFile, XrefFile );
CrossRef.PrintCrossReferences( cout );
}
catch( const TCrossRef::TXFileReadError& xReadError )
{
cout << xReadError.why() << endl;
return 1;
}
return 0;
}