home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.barnyard.co.uk
/
2015.02.ftp.barnyard.co.uk.tar
/
ftp.barnyard.co.uk
/
cpm
/
walnut-creek-CDROM
/
CPM
/
PROGRAMS
/
WSTAR
/
INDEX.C
< prev
next >
Wrap
C/C++ Source or Header
|
2000-06-30
|
13KB
|
462 lines
/********************************************************
* *
* INDEX *
* Text file index generator. *
* *
* T. Jennings 7/21/81 *
* 221 W. Springfield St. *
* Boston MA 02118 *
* *
* *
*********************************************************
INDEX Generates an ASCII WordStar (or equiv.) compatible
index from a text file. Words to be indexed are marked
with a control character. Entire phrases can be indexed
as well, by marking both ends with a different character.
The index generated will be sorted alphabetically, with
the first character of all entries in caps. Each entry
will contain the page number as well.
Two control characters, ^K and ^P are used. ^K marks
single words, and ^P marks phrases. Phrases too long
will be truncated to fit.
Examples:
The sixth word in this ^Ksentence will be put in the index.
^PThis entire phrase^P will be indexed.
The index for these two items, once printed, would look
like:
Sentence .................................... 2
This entire phrase .......................... 3
See INDEX.DOC for details
*/
#include <bdscio.h>
#define WORD_MARK 0x0b /* The single word marker */
#define PHRASE_MARK 0x10 /* Phrase marker */
#define CONTROLZ 0x1a
#define CR 0x0d /* useful ASCII characters */
#define LF 0x0a
#define WORD_LEN 39 /* maximum word or phrase size */
#define LAST_COL 40 /* column to start page # */
#define PAGE_LEN 66 /* default lines per page */
#define DEF_LMARGIN 8 /* default left margin, */
#define DEF_RMARGIN 72 /* ... and right margin */
char c; /* a useful variable for everyone */
int char_count; /* # characters looked at in file, */
int word_count; /* # words looked at */
int line_count; /* ditto lines */
int this_line; /* current line #/page */
int entries; /* # entries in index */
int this_page; /* current page */
int page_size; /* current max page length */
int lmargin,rmargin; /* current left and right margins */
int found_index; /* true if old index found */
char inbuf[BUFSIZ]; /* text input buffer */
char outbuf[BUFSIZ];
char inname[12]; /* where we save ASCII filenames */
char outname[12];
char tempname[12];
char pile[32768]; /* kludge */ /* Awaiting dynamic storage */
char *i; /* index pointer */
char *pointers[1024]; /* pile pointers for sorting */
change_page() /* dummy function to start a new */
{} /* page here. */
/* System interface for INDEX. Make sure theres at least one argument (the
filename). Open it for reading, (error check) make a temporary file for
output (filename.$I$) (error check). Then...
1/ Read the file sequentially, looking for marked words, [ index() ]
2/ Cleanup the pile of entries (remove leading blanks, convert each
1st character to uper case) [ cleanup() ]
3/ Sort the pile alphabeticlly [ sort() ]
4/ Dump the pile to the disk, expanding each to correct width,
and removing duplicate entries [ dump() ]
5/ Return to CP/M
*/
main(argc,argv)
int argc;
char *argv[];
{
printf ("\nINDEX-- Text file Index generator (c) T. Jennings 7/21/81");
page_size =PAGE_LEN;
lmargin =DEF_LMARGIN;
rmargin =DEF_RMARGIN;
found_index =FALSE;
if (--argc >0)
{ strcpy (inname,argv[1]); /* save the names, */
strcpy (outname,argv[1]);
if (fopen(inname,inbuf) ==ERROR) /* try to open source file,*/
{ printf ("\nCan't open %s",inname);
exit();
}
add_ext (outname,"$I$"); /* make the output file, */
if (fcreat(outname,outbuf) ==ERROR) /* reuse and destroy name*/
{ printf ("\nCan't create temporary file %s",outname);
exit();
}
}
else
{ printf ("\nSpecify a file to make an index from");
exit();
}
printf ("\nAdding an index to %s (removing any old index first)",inname);
index(); /* scan the file, */
cleanup(); /* convert each 1st char to upper case */
printf ("\nSorting, ");
sort(); /* sort it, */
printf ("saving it, ");
dump(); /* write it to the disk, */
printf ("cleaning up, ");
strcpy (tempname,inname);/* delete any .BAK */
add_ext (tempname,"BAK");
unlink (tempname);
rename (inname,tempname);/* rename original to .BAK, */
rename (outname,inname); /* rename new to original */
printf ("done.\n");
exit(); /* exit. */
}
/* Read the input file, and make a list of words to index. Maintain
the global variables indicating word count, etc. Leave a pile of
strings, followed by the page #, terminated with a control-z.
When done, PILE[] will have sequential null terminated strings, terminated
by a single control-z. Each pointer in POINTERS[] will point to the start
of each string, with the last pointing to the control-z.
KLUDGE: No limit check is done on the size of the pile, nor the size
of POINTERS. (currently 1024 entries, 32 char. each) */
index()
{
int inword; /* blank or character flag */
int gotword; /* true if saving this word */
int gotphrase; /* true if saving this phrase */
int entry_len; /* size of word or phrase */
char linebuf[132]; /* character line buffer */
int j;
int last_char_blank; /* suppress mult. spaces 'tween lines */
int k;
inword =FALSE; /* no word yet, */
gotword =FALSE; /* no marked word found, */
gotphrase =FALSE; /* no marked phrase found, */
last_char_blank =FALSE; /* too early... */
i =pile; /* set the pointers, ... */
k=0;
char_count =0;
word_count =0; /* and our booleans */
line_count =0;
entries =0;
this_page =1;
this_line =1;
while (fill_line(linebuf) !=CONTROLZ) /* while not EOF, */
{ if (scan_line(linebuf) ==TRUE) /* look for dot cmds */
break; /* get next line */
j=0;
++line_count; /* count total lines,*/
if (this_line++ >= page_size) /* current line, */
{ this_line =1;
++this_page;
}
while ((c =linebuf[j++]) !=0x00) /* while not end/line*/
{ ++char_count;
if (c== ' ')
{ inword =FALSE; /* end of a word */
if (gotword) /* if we were looking*/
{ ++i; /* leave null to mark*/
sprintf(i,"%2d",this_page);
while (*i++); /* point to next */
++entries; /* count another */
gotword =FALSE; /* done with word */
}
}
else if (inword ==FALSE) /* non-white char */
{ inword =TRUE;
++word_count;
}
if (c==WORD_MARK) /* new word to save */
{ gotword =TRUE; /* start saving next char */
entry_len =0;
pointers[k++] =i;/* set the pointer */
}
else if (c== PHRASE_MARK)
{ if (gotphrase) /* if we had one before, */
{ ++i; /* let the last null mark it */
sprintf(i,"%2d",this_page);
while (*i++);
++entries; /* counter another */
gotphrase =FALSE;
}
else /* new phrase */
{ gotphrase =TRUE; /* else start now. */
entry_len =0; /* just starting */
pointers[k++] =i;/* point to it */
}
}
/* see if we should store a character */
else if ( (gotphrase || gotword) &&
(entry_len++ <WORD_LEN) &&
(c >=' ') && (c !='.') &&
!(last_char_blank && c==' ') )
{ *i++ =c; /* were saving now */
*i =0x00; /* null terminate it always, */
last_char_blank= (c==' '? TRUE : FALSE);
}
}
}
*i =CONTROLZ; /* mark the top of the pile, */
pointers[k++] =i; /* set its pointer */
printf ("\n Put %d words in the index ",entries);
printf ("out of a total of %d words.",word_count);
return;
}
/* Sort routine. The array (pile) contains the index entries in no particular
order. Array of pointers points to each entry. Put the entire pile in ascending
alphabetic order. Very rude sort routine. (interchange) */
sort()
{
int first,last;
char *temp;
int changing;
do
{
first =0;
last =1;
changing =FALSE;
while (*pointers[last] !=CONTROLZ)
{ if (comp (pointers[first],pointers[last]) >0)
{ temp =pointers[first];
pointers[first] =pointers[last];
pointers[last] =temp;
changing =TRUE;
}
++first; ++last;
}
}
while (changing); /* until we make a do-nothing pass */
return;
}
/* Compare two strings. Return 0 if equal, >0 if first is greater than last,
<0 if first less than last. Ignores case. */
int comp(first,last)
char *first;
char *last;
{
int i;
while (toupper(*first) ==toupper(*last) )
{ if (*first == 0x00) /* stop at the null(if we got*/
return (0) /* this far, it matched */
;
++first; ++last;
}
i =(toupper(*first) -toupper(*last)); /* mismatch */
return (i);
}
/* Compare two strings, of a given length. Check only for equality. Return
0 if equal, else 1. */
compl(length,first,last)
int length;
char *first,*last;
{
for (; length >0; length--)
{ if ( (toupper(*first++)) != (toupper(*last++)) )
return (1)
;
}
return (0);
}
/* Dump the pile to the disk. Convert each entry to a single line, making each
an even number of columns wide. Put the page number at the end. Look for
duplicate entries, and remove them. */
dump()
{
int column;
int i,x;
char *current_entry;
char *j;
char c;
int local_i;
column =0;
i =0;
if (found_index ==FALSE) /* dont duplicate this */
{ sendstr ("\015\012.pa"); /* if its already there */
sendstr ("\015\012..index\015\012");
}
while (*pointers[i] !=CONTROLZ)
{ j =pointers[i++]; /* send chars until null */
if (*j !=0xff) /* if its a deleted dup copy, */
{ /* skip this */
current_entry =j; /* else save a copy, */
for (x =lmargin; x>0; x--) /* tab to left margin */
putc(' ',outbuf)
;
while (c =*j++)
{ putc (c,outbuf);
++column;
}
while (column++ <LAST_COL) /* make an entry */
putc ('.',outbuf) /* tab over, */
;
putc (' ',outbuf); /* type the page # */
while (c =*j++)
putc (c,outbuf)
;
/* Look for duplicate entries; if we find one, remove it (mark 0xff) and
output it's page # on the current one. If more than 5, stop, and start the
rest on the next line. */
local_i =0;
if (*pointers[i] !=CONTROLZ)
{ while ((comp(current_entry,pointers[i+local_i]) ==0) && (local_i <6) )
{ j =pointers[i+local_i++];
*j =0xff; /* mark it removed */
while (*j++) /* skip the entry, */
;
putc (',',outbuf);/* type a comma, */
while (c =*j++) /* send the page # */
putc (c,outbuf)
;
}
}
putc (CR,outbuf); /* new line, */
putc (LF,outbuf);
column =0; /* next line */
}
}
putc (CONTROLZ,outbuf);
fflush (outbuf);
fclose (outbuf);
return;
}
/* Fill a line buffer with characters. Convert all white_space characters
to a single blank, put a null at the end. Copy each character to the output
file. */
fill_line(buffer)
char buffer[];
{
int i;
char c;
char d;
int temp;
int inblank; /* true to suppress blanks */
inblank =FALSE;
i=0;
do
{ temp =getc(inbuf);
if (temp ==ERROR)
return (CONTROLZ) /* check physical end */
;
d =temp; /* type conversion */
if (d ==CONTROLZ) /* dont send the control-z */
return (d)
;
putc (d,outbuf);
c =d&0x7f;
if (c== ' ' || c== ',' || c== '\t' || c==CR || c==LF
|| c=='!' || c==';' || c==':' )
c=' '
;
if ((c !=' ') || (inblank==FALSE))
{ buffer[i++] =c;
buffer[i] =0x00;
}
inblank =(c ==' ' ? TRUE : FALSE);
d &=0x7f;
}
while ((d !=LF) && (d !=CONTROLZ) && (i <132))
;
return(d);
}
/* Scan for dot commands. When and if we find a dot command, fool the
caller into thinking that the line we were passed is now empty.
If we find the index mark, "..index", set the flag so we dont duplicate
it at dump time. */
scan_line(buffer)
char *buffer;
{
int i;
while (*buffer ==' ') /* skip leading blanks, */
++buffer
;
if (*buffer =='.') /* as per WS specs, must be first atom */
{ if (compl (3,buffer,".pa") ==0)
{ ++this_page;
this_line =0;
}
else if (compl (7,buffer,"..index") ==0)
{ *buffer =0x00;
found_index =TRUE;
return (TRUE);
}
}
return (FALSE);
}
/* Clean up the pile by converting each first character to upper
case. Deletes leading blanks by adjusting the pointer, but only if
the string is more than 1 char long. */
cleanup()
{
int i;
char c;
i =0;
while ((c= toupper(*pointers[i])) !=CONTROLZ)
{ if ((*pointers[i] ==' ') && (*pointers[i+1] !=0x00))
{ ++(pointers[i]);
c =toupper(*pointers[i]);
}
*pointers[i++] =c;
}
return;
}
/* Replace the extention on a filename, to the string we are passed. Note that
only the first 3 chars are significant, and it is assumed the filename exists
and is legal. */
add_ext(filename,ext)
char *filename;
char *ext;
{
int i;
for (i=0; (i<9); i++) /* look for the dot, or 8 chars, */
{ if ((*filename =='.') || (*filename ==0x00))
break
;
filename++;
}
*filename++ ='.'; /* add a dot, */
while (*filename++ =*ext++) /* the new extention */
;
}
/* Send an ascii string to the output file */
sendstr(string)
char *string;
{
while (*string)
{ putc (*string++,outbuf);
}
}