home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 5 Edit
/
05-Edit.zip
/
word2x0a.zip
/
source
/
reader.cc
< prev
next >
Wrap
C/C++ Source or Header
|
1998-04-20
|
26KB
|
1,297 lines
/* $Id: reader.cc,v 1.27 1997/04/17 20:24:41 dps Exp $ */
/* Reads the word document */
#ifdef __GNUC__
#define alloca __builtin_alloca
#else
#if HAVE_ALLOCA_H
#include <alloca.h>
#else /* Do not have alloca.h */
#ifdef _AIX
#pragma alloca
#else /* not _AIX */
char *alloca();
#endif /* _AIX */
#endif /* HAVE_ALLOCA_H */
#endif /* __GNUC__ */
#include <iostream.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "word6.h"
#include "interface.h"
#include "reader.h"
#define RESUME_CHARS 30
#define ST_MIN_ZEROS 16
#define ST_ASC_VCHARS 6
/* Seek start */
int chunk_reader::seek_start(FILE *f)
{
int nz,i,c;
rewind(f);
nz=0;
while ((c=fgetc(in))!=EOF)
{
if (c==0)
{
nz++;
continue;
}
if (nz<ST_MIN_ZEROS)
{
nz=0;
continue;
}
/* Deliberately ignore the first non-zero character */
nz=0;
for (i=0; i<ST_ASC_VCHARS; i++)
{
if ((c=fgetc(in))==EOF)
break;
if (c<' ' || c>126)
break;
}
if (i==ST_ASC_VCHARS)
{
fseek(f, -ST_ASC_VCHARS-1, SEEK_CUR);
return 1;
}
}
return 0;
}
/* Hanlde ^M^G---I think this is ^G */
static int read_char(FILE *in)
{
int c, d;
c=fgetc(in);
if (c!=PAR_END)
return c;
d=fgetc(in);
if (d==TABLE_SEP)
return d;
ungetc(d, in);
return c;
}
/* This code is basically a layered filtration process. At the bottom layer
is this next function that reads a character from a word document. Pointers
to object are used extensively to avoid implicit copies. */
static int read_character(FILE *in)
{
int c, d, asc;
static int s_ch=-42; // Assume EOF not -42
re_feed:
if (s_ch!=-42)
{
c=s_ch;
s_ch=-42;
}
else
c=read_char(in);
switch(c)
{
case 0:
/* This is a hack to skip junk---seems to work quite well */
asc=0;
while (asc<RESUME_CHARS)
{
if ((c=fgetc(in))==EOF)
goto re_feed;
if (c>=' ' && c<127)
asc++;
else
asc=0;
}
fseek(in, -RESUME_CHARS, SEEK_CUR);
return CH_SUSPECT;
case PAR_END:
return (CH_PAR | CONTROL_FLAG);
case TABLE_SEP:
d=read_char(in);
if (d!=c)
{
s_ch=d; /* Push back character */
return (CH_FIELD | CONTROL_FLAG);
}
return (CH_ROW | CONTROL_FLAG);
case START_CTL:
return (CH_SPEC | CONTROL_FLAG);
case END_CTL:
return (CH_ENDSPEC | CONTROL_FLAG);
case HARD_RETURN:
return (CH_HDRTN | CONTROL_FLAG);
case NEW_PAGE:
return (CH_PAGE | CONTROL_FLAG);
case FOOTNOTE:
return (CH_FOOTNOTE | CONTROL_FLAG);
default:
if (c<' ' && c!=EOF)
return (CH_OTHER | CONTROL_FLAG);
else
return c;
}
}
/* This function reads a paragraph, field of a table or whatever. It copies
everything in any embed tags unprocessed and leaves it in the element */
void chunk_reader::read_chunk_raw(void)
{
int c, is_ctl=0;
text.zero(); // Zero text buffer
while ((c=read_character(in))!=(EOF | CONTROL_FLAG))
{
if (c & CONTROL_FLAG)
{
c &= ~CONTROL_FLAG;
/* If in embedded item then ignore all but end embed */
if (is_ctl)
{
if (c==CH_ENDSPEC)
{
is_ctl=0;
text.add(c);
}
continue;
}
switch(c)
{
case CH_PAR:
case CH_FIELD:
case CH_ROW:
break;
case CH_HDRTN:
text.add('\n'); // Add newline
continue; // Continue processing
case CH_OTHER:
continue; // Just ignore character
case CH_SPEC:
text.add(c);
if (!is_ctl)
is_ctl=1;
continue;
case CH_ENDSPEC:
cerr<<"Suprious ^U ignored\n";
continue;
case CH_PAGE:
case CH_FOOTNOTE:
text.add(c);
continue;
default:
cerr<<"Unexpected value "<<(c & (~CONTROL_FLAG))\
<<" switch\n";
continue;
}
type=c;
tptr=text;
return;
}
/* Not control or end of inclusion */
text.add(c);
}
type=CH_EOF;
tptr=text;
return;
}
/* This function reads chunks from using read_chunk_raw and hands them
out in contigous peices of the same type. Emebedded stuff gets
seperated from the rest here. The partial flag is set if only some
of a field is returned (usually because of an embedded item).
*/
struct chunk_rtn chunk_reader::read_chunk(void)
{
const char *s; // Save stupid compilers
struct chunk_rtn res;
if (tptr==NULL)
this->read_chunk_raw();
s=tptr;
/* Embed */
if (*s==CH_SPEC)
{
while (*(++s))
{
if (*s==CH_ENDSPEC)
break;
res.txt.add(*s);
}
tptr=s+1;
res.type=CH_SPEC;
return res;
}
/* New page */
if (*s==CH_PAGE)
{
res.type=CH_PAGE;
tptr=s+1;
return res;
}
if (*s==CH_FOOTNOTE)
{
res.type=CH_FOOTNOTE;
tptr=s+1;
return res;
}
/* Normal */
while (*s)
{
if (*s==CH_SPEC || *s==CH_PAGE || *s==CH_FOOTNOTE)
{
tptr=s;
res.type=(PART_FLAG | type);
return res;
}
res.txt.add(*s);
s++;
}
res.type=type;
tptr=NULL;
text.zero(); // Save memory
return res;
}
/*----------------------------------------------------------------------*/
/* Tables and basic stuff */
/*
* Refill the token queue.
*/
int tok_seq::rd_token(void)
{
struct chunk_rtn r;
tok *t;
char other[2];
r=read_chunk();
if (r.type==CH_EOF)
return 0;
switch(r.type & ~PART_FLAG)
{
case CH_ROW:
if (table==NULL)
table=new(table_info);
/* Handle 1 field rows properly */
if (table->col==0)
{
t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
table->enqueue(t);
}
table->col++;
if (table->col>table->cols)
table->cols=table->col;
table->rows++;
table->tok_push(T_FIELD, &(r.txt));
t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
table->enqueue(t);
table->col=0;
break;
case CH_FIELD:
if (table==NULL)
{
table=new(table_info);
}
if (table->col==0)
{
t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
table->enqueue(t);
}
table->col++;
table->tok_push(T_FIELD, &(r.txt));
break;
case CH_PAR:
if (table!=NULL)
{
/* Table handling */
if (table->col!=0)
{
#if 0
table->tok_push(T_FIELD, &(r.txt));
t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
table->enqueue(t);
t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
table->enqueue(t);
for (i=0; i<table->col; i++)
{
t=new(tok)(T_FIELD, "\0", tok::TOK_START);
table->enqueue(t);
t=new(tok)(T_FIELD, (void *) NULL, tok::TOK_END);
table->enqueue(t);
}
table->rows++;
break;
#else
t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
table->enqueue(t);
if (table->col>table->cols)
table->cols=table->col;
table->rows++;
#endif
}
table->finish(&output);
delete(table);
table=NULL;
}
if (r.type & PART_FLAG)
{
tok *td;
td=new(tok)(T_PARAGRAPH, (const char *) (r.txt), tok::TOK_START);
output.enqueue(td);
}
else
tok_push(T_PARAGRAPH, &(r.txt));
break;
case CH_SPEC:
tok_push(T_SPEC, &(r.txt));
break;
case CH_PAGE:
case CH_FOOTNOTE:
other[0]=r.type;
other[1]='\0';
t=new(tok)(T_CODE, other, tok::TOK_START);
output.enqueue(t);
break;
default:
break;
}
return 1;
}
/*----------------------------------------------------------------------*/
/* Equations.... */
/*
* Code that scans forward to the end of stuff that looks like an extension
* of some maths that was the last thing.
*/
const char *math_forward_scan(const char *s)
{
const char *scan, *end;
int blvl;
end=scan=s;
/* Check whether the first part looks like more of the equation */
while (1)
{
/* Skip spaces */
while (isspace(*scan))
scan++;
/* Look for binary operator */
if (*scan=='+' || *scan=='-' || *scan=='*' || *scan=='/' ||
*scan=='=')
{
/* skip spaces */
scan++;
while (isspace(*scan))
scan++;
/* Grab next word */
blvl=0;
while (!isspace(*scan) || blvl>0)
{
switch(*scan)
{
case '(':
blvl++;
break;
case ')':
blvl--;
break;
default:
break;
}
if (*scan=='\0')
break; // Robustness fix
scan++;
}
end=scan; // Update end
}
else
break; // No binary operator, assume no text
}
return end;
}
/*
* Code that scans backwards to the start of stuff that looks like it should
* ohave been prepended to the current maths.
*/
const char *math_reverse_scan(const char *s)
{
const char *scan, *start;
int blvl;
start=scan=s+strlen(s)-1;
/* Check whether the first part looks like more of the equation */
while (scan>=s)
{
/* Skip spaces */
while (scan>=s && isspace(*scan))
scan--;
if (scan<s)
return s;
/* Look for binary operator */
if (*scan=='+' || *scan=='-' || *scan=='*' || *scan=='/' ||
*scan=='=')
{
/* skip spaces */
scan--;
while (scan>=s && isspace(*scan))
scan--;
if (scan<s)
return s;
/* Grab next word */
blvl=0;
while (!isspace(*scan) || blvl>0 )
{
switch(*scan)
{
case ')':
blvl++;
break;
case '(':
blvl--;
break;
default:
break;
}
if (scan==s)
return s; // Robustness fix
scan--;
}
start=scan; // Update end
}
else
break; // No binary operator, assume no text
}
return start;
}
/*
* Code to feed a token one at a time. (private, need prostproccessing
* to compensate for equation abuse by word users)
*/
const tok_seq::tok *tok_seq::feed_token(void)
{
while (output.is_empty())
{
if (!rd_token())
return NULL;
}
return output.dequeue();
}
/* Private token reader, compensates for equation abuse */
const tok_seq::tok *tok_seq::math_collect(void)
{
const tok *rdt, *ntok, *nntok;
const char *mptr, *endptr;
char *s, *t;
math_aggregate: ;
if ((rdt=this->saved_tok)==NULL)
{
if ((rdt=this->feed_token())==NULL)
return NULL;
}
else
saved_tok=NULL;
switch (rdt->tok & (~PART_FLAG))
{
case T_PARAGRAPH:
if (rdt->end!=tok::TOK_START || (rdt->tok & PART_FLAG==0)
|| rdt->data.d==NULL)
break;
if ((ntok=this->feed_token())==NULL)
break;
/* Passed all the easy rejection cases, invoke math_reverse_scan */
saved_tok=ntok;
if (ntok->tok==T_SPEC && ntok->end==tok::TOK_START &&
ntok->data.d!=NULL && strncmp(ntok->data.d, "eq ", 3)==0)
{
mptr=math_reverse_scan(rdt->data.d);
endptr=rdt->data.d+strlen(rdt->data.d)-1;
if (mptr>=endptr)
break;
/* Allocate memory */
if ((s=(char *) malloc(mptr-rdt->data.d+1))==NULL)
{
cerr<<"Malloc read_token::malloc failure (fatal)\n";
exit(1);
}
if ((t=(char *) malloc(strlen(ntok->data.d)+endptr-mptr+1))==NULL)
{
free((void *) s);
cerr<<"Malloc read_token::malloc failure (fatal)\n";
exit(1);
}
/* Compute result strings */
memcpy(s, rdt->data.d, mptr-rdt->data.d);
*(s+(mptr-rdt->data.d))='\0';
memcpy(t, ntok->data.d, 3);
memcpy(t+3, mptr, endptr-mptr+1);
strcpy(t+3+(endptr-mptr)+1, ntok->data.d+3);
/* Replace original data */
free((void *) rdt->data.d);
((tok *) rdt)->data.d=s;
free((void *) ntok->data.d);
((tok *) ntok)->data.d=t;
}
break;
case T_SPEC:
if (rdt->end!=tok::TOK_START || rdt->data.d==NULL ||
strncmp(rdt->data.d, "eq ", 3)!=0)
break;
if ((nntok=this->feed_token())==NULL)
break; // this is the end of the SPEC.
if (nntok->tok!=T_SPEC || nntok->end!=tok::TOK_END)
{
cerr<<"Unexpected value of nntok: type "
<<nntok->tok<<" end "<<nntok->end<<"\n";
}
if ((ntok=this->feed_token())==NULL)
{
output.insert(nntok);
break;
}
/* Passed all the easy rejection cases, invoke math_forward_scan */
saved_tok=ntok;
if (ntok->tok==T_PARAGRAPH && ntok->end!=tok::TOK_END &&
ntok->data.d!=NULL)
{
mptr=math_forward_scan(ntok->data.d);
endptr=ntok->data.d+strlen(ntok->data.d);
if (mptr==ntok->data.d)
{
output.insert(ntok); // This comes out second
output.insert(nntok);
saved_tok=NULL;
break;
}
/* Allocate memory */
if (*mptr!='\0')
{
if ((s=(char *) malloc(endptr-mptr))==NULL)
{
cerr<<"Malloc read_token::malloc failure (fatal)\n";
exit(1);
}
memcpy(s, mptr, endptr-mptr);
*(s+(endptr-mptr))='\0';
}
else
s=NULL;
if ((t=(char *)
malloc(strlen(rdt->data.d)+mptr-ntok->data.d+1))==NULL)
{
if (s!=NULL)
free((void *) s);
cerr<<"Malloc read_token::malloc failure (fatal)\n";
exit(1);
}
endptr=rdt->data.d+strlen(rdt->data.d);
memcpy(t, rdt->data.d, endptr-rdt->data.d);
memcpy(t+(endptr-rdt->data.d), ntok->data.d, mptr-ntok->data.d);
*(t+(endptr-rdt->data.d)+(mptr-ntok->data.d))='\0';
/* Afjust result */
free((void *) rdt->data.d);
((tok *) rdt)->data.d=t;
if (*mptr=='\0')
{
/* If we consumed 100% continue seeking */
delete(ntok);
saved_tok=rdt;
output.insert(nntok); // Re-insert end of spec.
goto math_aggregate;
}
free((void *) ntok->data.d);
((tok *) ntok)->data.d=s;
/* Not all consumed, return result */
}
else if (ntok->tok==T_SPEC && ntok->end==tok::TOK_START &&
ntok->data.d!=NULL && strncmp(ntok->data.d, "eq ", 3)==0)
{
/* Combine consecutive eq's */
endptr=rdt->data.d+strlen(rdt->data.d);
if ((t=(char *)
malloc((endptr-rdt->data.d)+strlen(ntok->data.d)-2))==NULL)
{
cerr<<"Malloc read_token::malloc failure (fatal)\n";
exit(1);
}
memcpy(t, rdt->data.d, endptr-rdt->data.d);
strcpy(t+(endptr-rdt->data.d), ntok->data.d+3);
delete(nntok); // Reply on end of spec following this eq
delete(ntok); // Junk this eq
free((void *) rdt->data.d);
((tok *) rdt)->data.d=t;
saved_tok=rdt;
goto math_aggregate;
}
output.insert(ntok); // This comes out second
output.insert(nntok);
saved_tok=NULL;
break;
default:
break;
}
return rdt;
}
/* Private choke point between equations and lists token reader */
const tok_seq::tok *tok_seq::eqn_rd_token(void)
{
const tok *t, *n;
fifo<tok> *tf;
int tot, specs;
if ((t=this->math_collect())==NULL)
return NULL;
switch(t->tok)
{
case T_PARAGRAPH:
if (t->end!=tok::TOK_START)
return t;
/* Check for spec only paragraph */
tf=new(fifo<tok>);
n=t;
tot=0;
specs=0;
/*
* This loop counts the number of characters in paragraphs and other
* items untilt the end of the paragraph. Each item is dumped on tf
* and this is inserted onto the beginning of the output queue.
*/
while(1)
{
tf->enqueue(n);
if (n->tok==T_PARAGRAPH)
{
if (n->end==tok::TOK_END)
break;
if (n->data.d!=NULL)
tot+=strlen(n->data.d);
if (tot>DISPL_TRESHOLD)
break;
}
else
specs++;
if (n->tok!=T_SPEC && n->tok!=T_OTHER && n->tok!=T_PARAGRAPH)
{
tot+=DISPL_TRESHOLD;
break;
}
if ((n=this->math_collect())==NULL)
break;
}
/*
* If the total is small enough and there is one or more item that
* will make it through the filter. Since insert()ed things end up
* in reverse order we must first reverse the queue (this is the
* uncommon case, so it is OK if it costs a bit more).
*/
if (tot<DISPL_TRESHOLD && specs>0)
{
tf->rev();
while ((n=tf->dequeue())!=NULL)
{
if (n->tok!=T_PARAGRAPH)
output.insert(n);
else
delete(n);
}
}
else
{
output.ins_trans(tf);
}
delete(tf);
t=output.dequeue();
break;
default:
break;
}
return t;
}
/*----------------------------------------------------------------------*/
/* Now move on to lists.... */
/* Return NULL or a new list record */
struct tok_seq::list_info *tok_seq::list_type(const char *txt)
{
struct list_info *nl;
int i,n;
/* Determine initial number, if any */
if (!isdigit(txt[0]))
n=-1;
else
{
n=i=0;
for (n=0, i=0; isdigit(txt[i]); i++)
n=n*10+txt[i]-'0';
}
if (n==1)
{
nl=new(struct list_info);
nl->list_type=LIST_ENUMERATE;
nl->ldata.item_no=0;
nl->obj_cnt=0;
nl->text_cnt=0;
nl->last_item=new(fifo<tok_seq::tok>);
nl->items=0;
return nl;
}
/* a., b., c. */
if (txt[0]=='a')
{
i=(txt[1]=='.') ? 2 : 1;
if (isspace(txt[i]))
{
nl=new(struct list_info);
nl->list_type=LIST_ENUM_ALPHA;
nl->ldata.lbullet=txt[0]-1;
nl->obj_cnt=0;
nl->text_cnt=0;
nl->last_item=new(fifo<tok_seq::tok>);
nl->items=0;
return nl;
}
}
/* A., B., C. */
if (txt[0]=='A')
{
i=(txt[1]=='.') ? 2 : 1;
if (isspace(txt[i]))
{
nl=new(struct list_info);
nl->list_type=LIST_ENUM_ALPHA;
nl->ldata.lbullet=txt[0]-1;
nl->obj_cnt=0;
nl->text_cnt=0;
nl->last_item=new(fifo<tok_seq::tok>);
nl->items=0;
return nl;
}
}
/* At present we only know about one of bullet */
if (txt[0]==(char) BULLET_CODE)
{
nl=new(struct list_info);
nl->list_type=LIST_BULLET;
nl->ldata.lbullet=txt[0];
nl->obj_cnt=0;
nl->text_cnt=0;
nl->last_item=new(fifo<tok_seq::tok>);
nl->items=0;
return nl;
}
return NULL;
}
const char *tok_seq::l_type_name(const struct list_info *lp)
{
switch(lp->list_type)
{
case LIST_BULLET:
return "itemize";
/* Not reached */
case LIST_ENUMERATE:
return "enumerate";
/* Not reached */
case LIST_ENUM_ALPHA:
return "listalpha";
/* Not reached */
default:
return "programming error";
/* Not reached */
}
/* Not reached */
}
/* Dequeue a list and queue it is as paragraphs */
static void list_to_para(fifo<tok_seq::tok> *out, fifo<tok_seq::tok> *add)
{
tblock txt;
int was_item_st;
const tok_seq::tok *t;
was_item_st=0;
while(!add->is_empty())
{
t=add->dequeue();
switch(t->tok)
{
case T_LIST:
delete(t);
continue;
/* Not reached */
case T_ITEM:
if (t->end==tok_seq::tok::TOK_START)
{
txt.add(t->data.d);
txt.add(' ');
was_item_st=1;
}
delete(t);
continue;
/* not reached */
case T_PARAGRAPH:
if (t->end!=tok_seq::tok::TOK_START)
break;
if (!was_item_st)
break;
txt.add(t->data.d);
delete(t);
t=new(tok_seq::tok)(T_PARAGRAPH, (const char *) txt,
tok_seq::tok::TOK_START);
txt.zero();
was_item_st=0;
break;
default:
break;
}
out->enqueue(t);
}
}
/*
* This handles cues for lists and the like. if ( ) else if ()
* ... gets messy fast
*/
const char *tok_seq::list_check(const char *txt, list_info **lh)
{
struct list_info *lp, *nl;
char *s;
tok *t;
int i,n;
/* Determine initial number. This will not change */
if (!isdigit(txt[0]))
n=-1;
else
{
n=i=0;
for (n=0, i=0; isdigit(txt[i]); i++)
n=n*10+txt[i]-'0';
}
lp=*lh;
list_reconsider:
while (lp!=NULL)
{
*lh=lp; // Makes no change unless lp changed below
switch (lp->list_type)
{
case LIST_ENUMERATE:
if (n==lp->ldata.item_no+1)
{
if (txt[i]=='.')
i++;
while (isspace(txt[i]))
i++;
if ((s=(char *) alloca(i+1))==NULL)
{
fprintf(stderr,
"Warning: item label skipped due to lack"
" of memory\n");
}
else
{
memcpy(s, txt, i);
*(s+i)='\0';
}
if (lp->items!=0)
{
outqueue.transfer(lp->last_item);
t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
outqueue.enqueue(t);
}
t=new(tok)(T_ITEM, s, tok::TOK_START);
lp->last_item->enqueue(t);
t=new(tok)(T_PARAGRAPH, txt+i, tok::TOK_START);
lp->last_item->enqueue(t);
lp->ldata.item_no++;
lp->obj_cnt=0; // No not list objects after this one
lp->text_cnt=0;
lp->items++;
return NULL;
}
break;
case LIST_BULLET:
if (txt[0]==lp->ldata.lbullet)
{
for (i=0; (isspace(txt[i])); i++ ) ;
if ((s=(char *) alloca(2))==NULL)
{
fprintf(stderr,
"Warning: item label skipped due to lack"
" of memory\n");
}
else
{
*s=lp->ldata.lbullet;
*(s+1)='\0';
}
if (lp->items!=0)
{
outqueue.transfer(lp->last_item);
t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
outqueue.enqueue(t);
}
t=new(tok)(T_ITEM, s, tok::TOK_START);
lp->last_item->enqueue(t);
while (isspace(*(++txt)));
t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
lp->last_item->enqueue(t);
lp->obj_cnt=0; // No not list objects after this one
lp->text_cnt=0;
lp->items++;
return NULL;
}
break;
case LIST_ENUM_ALPHA:
if (txt[0]==lp->ldata.lbullet+1)
{
lp->ldata.lbullet++;
if ((s=(char *) alloca(3))==NULL)
{
fprintf(stderr,
"Warning: item label skipped due to lack"
" of memory\n");
}
else
{
*s=lp->ldata.lbullet;
if (txt[1]=='.')
{
*(s+1)='.';
*(s+2)='\0';
}
else
*(s+1)='\0';
}
if (lp->items!=0)
{
outqueue.transfer(lp->last_item);
t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
outqueue.enqueue(t);
}
t=new(tok)(T_ITEM, s, tok::TOK_START);
lp->last_item->enqueue(t);
for (i=0; (!isspace(txt[i])); i++ ) ;
for ( ;(isspace(txt[i])); i++) ;
t=new(tok)(T_PARAGRAPH, txt+i, tok::TOK_START);
lp->last_item->enqueue(t);
lp->obj_cnt=0; // No not list objects after this one
lp->text_cnt=0;
lp->items++;
return NULL;
}
break;
default:
fprintf(stderr, "Popping invalid list type %d\n",
lp->ldata.item_no);
nl=lp->next_list;
free(lp);
continue;
}
/* Not the right thing */
if ((nl=list_type(txt))!=NULL)
{
if (lp!=NULL && !(lp->last_item->is_empty()))
outqueue.transfer(lp->last_item); // Output outstanding items
t=new(tok)(T_LIST, l_type_name(nl), tok::TOK_START);
nl->last_item->enqueue(t);
nl->next_list=lp;
lp=nl;
continue;
}
lp->obj_cnt++;
lp->text_cnt +=strlen(txt);
if (lp->obj_cnt>PAR_ITEM_SEP_LIMIT || lp->text_cnt>TEXT_ITEM_SEP_LIMIT)
{
/* If only one item, not a list */
if (lp->items<2)
{
recycled=new(fifo<tok_seq::tok>);
list_to_para(recycled, lp->last_item);
delete(lp->last_item);
nl=lp->next_list;
free(lp);
lp=nl;
*lh=lp;
if (lp!=NULL)
lp->last_item->enqueue(recycled->dequeue());
else
outqueue.enqueue(recycled->dequeue());
return NULL;
}
/* Copy the list item */
if (!(lp->last_item->is_empty()))
{
const tok *tf;
tf=lp->last_item->dequeue();
while (tf->tok!=T_PARAGRAPH || tf->end!=tok::TOK_END)
{
outqueue.enqueue(tf);
if (lp->last_item->is_empty())
goto lend_para_done;
tf=lp->last_item->dequeue();
}
outqueue.enqueue(tf);
lend_para_done: ;
}
/* Finish off the list */
t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
outqueue.enqueue(t);
t=new(tok)(T_LIST, l_type_name(lp), tok::TOK_END);
outqueue.enqueue(t);
nl=lp->next_list;
recycled=lp->last_item; // Recycle elements queued
t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
recycled->enqueue(t);
free(lp);
lp=nl;
*lh=lp;
return NULL;
}
t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
lp->last_item->enqueue(t);
return NULL;
}
/* lp==NULL if we get here */
if ((nl=list_type(txt))!=NULL)
{
nl->next_list=lp;
lp=nl;
t=new(tok)(T_LIST, l_type_name(nl), tok::TOK_START);
nl->last_item->enqueue(t);
goto list_reconsider;
}
return txt;
}
const tok_seq::tok *tok_seq::read_token(void)
{
const tok *tf;
const char *tp;
tok *t;
struct list_info *nl;
while(outqueue.is_empty())
{
if (recycled!=NULL)
{
if (recycled->is_empty())
{
delete(recycled);
recycled=NULL;
continue; // outqueue still empty
}
tf=recycled->dequeue();
}
else
tf=this->eqn_rd_token();
if (tf==NULL)
{
if (!done_end)
{
tok *t;
t=new(tok)(T_DOC, "End of word2x output", tok::TOK_END);
output.enqueue(t);
done_end=1;
continue;
}
else
return NULL;
}
if (tf->tok==T_DOC && tf->end==tok::TOK_END)
{
/* End all lists */
while (lp!=NULL)
{
tp=l_type_name(lp);
nl=lp->next_list;
if (!(lp->last_item->is_empty()))
outqueue.transfer(lp->last_item);
delete(lp->last_item);
free(lp);
t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
outqueue.enqueue(t);
t=new(tok)(T_LIST, tp, tok::TOK_END);
outqueue.enqueue(t);
lp=nl;
}
outqueue.enqueue(tf);
}
else if (tf->tok==T_PARAGRAPH && tf->end==tok::TOK_START)
{
tp=list_check(tf->data.d, &lp);
if (tp!=NULL)
{
t=new(tok)(T_PARAGRAPH, tp, tok::TOK_START);
outqueue.enqueue(t);
/* End paragraph will come from previous stage */
}
delete(tf);
}
else
{
if (lp==NULL)
outqueue.enqueue(tf);
else
lp->last_item->enqueue(tf);
}
}
tf=outqueue.dequeue();
return tf;
}
ostream &operator<<(ostream &os, const tok_seq::tok *d)
{
os<<'('<<d->tok<<',';
switch(d->dtype)
{
case 1:
if (d->data.d!=NULL && strlen(d->data.d)>10)
{
char foo[11];
int i;
for(i=0; i<7; i++)
foo[i]=d->data.d[i];
for ( ; i<10; i++)
foo[i]='.';
foo[10]='\0';
os<<foo;
}
else
os<<d->data.d;
break;
case 0:
os<<d->data.table.rows<<'x'<<d->data.table.cols;
break;
}
os<<','<<((d->end==tok_seq::tok::TOK_START) ? "start" : "end")<<')';
return os;
}
tok_seq::tok &tok_seq::tok::operator=(const tok_seq::tok &d)
{
tok=d.tok;
end=d.end;
dtype=d.dtype;
if (d.dtype==TEXT && d.data.d!=NULL)
{
data.d=strdup(d.data.d);
}
return (*this);
}