home *** CD-ROM | disk | FTP | other *** search
- #include "datapriv.hpp"
-
- /*
- This file contains the routines concerned with the generation and use
- of index files
- */
-
- /***************************************************************************
-
- Verify Index
-
- The following routine verifys an index by opening a record on that index
- and checking that all records are present and in order
-
-
- ***************************************************************************/
-
- long clusnrec; // Total number of records found in clusters
-
- int database::verifyindex(char *ind,int depth,void (*progress)(int,long),int order)
- {
- if (!getindkey(ind)) return INVIND; // Must be valid attached index
-
- record rec(*this,ind);
-
- char res[128],resn[128];
- int rtype;
- char iexp[128];
- strcpy(iexp,getindkey(ind));
- rec.select(FIRST,ALL);
-
- if (!depth) return 0;
-
- // Write all changed clusters to the index file
-
- indclus *ip=rec.curind->icp->oi->topind;
- while(ip)
- {
- if (ip->change && ip->valid)
- {
- ip->change=0;
- Fseek(ip->fp,ip->clusn*512L,SEEK_SET); Fwrite(ip->clusdat,1,512,ip->fp);
- }
- ip=ip->next;
- }
-
- if (progress) (*progress)(1,0);
- clusnrec=0;
- if (!rec.curind->icp->oi->topind->verclus(progress,order)) return ERRINTREE;
- if (clusnrec!=getnrec()) return INCOMP;
- if (depth<2) return 0;
-
- rec.eval(iexp,res,rtype);
- for(long i=1; i<getnrec(); i++) // Depth=2, check out record order
- {
- if (progress)
- {
- if (i==1) (*progress)(2,i);
- if (!(i%order)) (*progress)(2,i);
- }
- if (rec.select(NEXT,ALL)) return INCOMP;
-
- rec.eval(resn,rtype);
- double comp;
- switch(rtype)
- {
- case OPINT : comp=*(double *)resn-*(double *)res; break;
- case OPDATE: comp=atol(resn)-atol(res);
- case OPSTR : comp=strcmp(resn,res);
- }
- if (comp<0) return OORD;
- memcpy(res,resn,100);
- }
- if (depth<3) return 0;
-
- // Depth 3, check all records present in index
- record rp(*this); rp.select(FIRST);
- record frp(*this,ind);
- for(i=1; i<=getnrec(); i++)
- {
- if (progress)
- {
- if (i==1) (*progress)(3,i);
- if (!(i%order)) (*progress)(3,i);
- }
- rp.eval(getindkey(ind),res,rtype);
- if (frp.selkey(res,READIND,rtype)) return RECNOTFND;
- while(rp.getrecnum()!=frp.getrecnum()) frp.selkey();
- rp.select(NEXT,ALL);
- }
- return 0;
- }
-
- // Verify a cluster tree, return last key if OK, return 0 on error
- // If record cluster then add number of records to clusnrec
-
- char *indclus::verclus(void (*progress)(int,long),int order)
- {
- long nrec; // Points to each record in index
- char *lrec,*nkey;
- indclus *ni;
- int i,created;
-
- if (clustyp) // Block cluster
- {
- for(i=0; i<nclusrec; i++) // Check each cluster in the block
- {
- int err=0;
- nrec=*(long *)(clusdat+4+i*reclen); // Next block down
- nkey=clusdat+12+i*reclen; // Last key of next block down
- ni=new indclus(nrec,0,oi); // Next index down
- lrec=ni->verclus(progress,order);
- if (!lrec) err=1; // Error in next block down
- else
- switch(oi->restype) // Check last key in child matches this
- {
- case OPSTR : if (strncmp(nkey,lrec,explen)) err=1; break;
- case OPINT : if (*(double *)nkey-*(double *)lrec) err=1; break;
- case OPDATE : if (strncmp(nkey,lrec,8)) err=1; break;
- }
- delete ni;
- if (err) return 0;
- }
- }
- else // Record cluster
- {
- long oclus=clusnrec;
- clusnrec+=nclusrec;
- if (progress && (oclus/order!=clusnrec/order)) (*progress)(1,clusnrec);
- }
- char *rv=(clusdat+12+reclen*(nclusrec-1));
- return rv;
- }
-
-
- /***************************************************************************
-
- BUILD INDEX
-
- The following routines and structures are concerned with building an
- index from scratch. A flat file of unsorted record clusters is built,
- these are then sorted, and then the tree is built at the end of the file
- finally the header record is written
-
- ***************************************************************************/
-
- // The following holds a disk buffer, there are DBUF of these.
-
- #define DBUF 5 // Number of disk clusters buffered in indexes
-
- struct _dbuf
- {
- char buf[512]; // Holds the data from disk
- char uflag; // Counter increments once for every use
- long dpos; // Position in disk file
- int cval; // contents have changed if cval==1
- };
-
- _dbuf *_dbp; // Points to disk buffer array
-
-
- /************** Build Index ... This routine builds an index ************/
-
- // Returns 0 on success, error otherwise
-
- int database::buildindex(char *expr,char *fname,void (*progress)(int,long),int order)
- {
- record rec(*this,NOIND); // record
- int indlen,indtype; // Index length and type
- int tindlen; // total index length with pad
- char ws[128],*wsp; // Work space, generic pointer
- FILE *ifp; // Index file pointer
- int ipad; // No. of characters to pad out index result
- int nclusrec; // No. of records/cluster
- char buf[512]; // Useful buffer
- long i,j,k;
- int dum;
- long topclus=1; // Cluster at top of tree
-
- int rv=0;
- if (nrec) rv=rec.select(FIRST,ALL);
- if (!(indlen=rec.indchk(expr,indtype)) || rv) return EXPRERR;
- ipad=(4-indlen%4)%4;
- tindlen=indlen+ipad;
- nclusrec=504/(indlen+ipad+8);
-
- strcpy(ws,fname);
- if (wsp=strchr(ws,'.')) *wsp=0;
- strcat(ws,".NDX");
- if (!(ifp=fopen(ws,"w+b"))) return NOINDEX;
-
- Fwrite(buf,512,1,ifp); // Write a blank header (fill it in later)
- rec.eval(expr,ws,dum); // Tokenise expression
-
- if (nrec)
- {
- long nwrit=0;
- while(!rv) // Write out all records & index expressions
- {
- long nwr=0; // No. of records written
- int dum;
- char *ip=buf+4; // Pointer to next free space in buffer
-
- for(i=0; i<nclusrec; i++)
- {
- if (rv) break;
- if (!rec.eval(ws,dum)) {fclose(ifp); return EXPRERR;}
- *(long *)ip=0; ip+=4; *(long *)ip=rec.getrecnum(); ip+=4; // rec. no.
- switch(indtype) // Result
- {
- case OPINT : *(double *)ip=*(double *)ws; ip+=sizeof(double); break;
- case OPSTR : wsp=ws; while(*wsp++); wsp--; k=wsp-ws;
- for(j=0; j<tindlen-k; j++) *wsp++=' ';
- strncpy(ip,ws,tindlen); ip+=tindlen;
- break;
- case OPDATE : strncpy(ip,ws,8); ip+=tindlen; break;
- }
- nwr++; nwrit++;
- if (progress && (nwrit==1 || !(nwrit%order))) (*progress)(1,nwrit);
- rv=rec.select(NEXT,ALL);
- }
- *(double *)ip=0;
- *(long *)buf=(long)nwr;
- Fwrite(buf,512,1,ifp);
- }
-
- _dbp=new _dbuf[DBUF];
- indsort(ifp,indtype,tindlen+8,indlen,nrec,nclusrec,progress,order);
- delete _dbp;
- if (progress) (*progress)(3,1);
- buildtree(ifp,tindlen+8,indlen,nclusrec,nrec/nclusrec+1,topclus);
- }
-
- Fseek(ifp,0L,SEEK_SET); // Now write header
- memset(buf,0,512);
- *(long *)buf=topclus; *(long *)(buf+4)=topclus+1;
- switch(indtype)
- {
- case OPINT : buf[9]='N'; buf[16]=1; break;
- case OPDATE: buf[9]='D'; buf[16]=0; break;
- case OPSTR : buf[9]='C'; break;
- }
- buf[12]=indlen;
- buf[14]=nclusrec;
- buf[18]=indlen+ipad+8;
- strcpy(buf+24,expr);
- Fwrite(buf,512,1,ifp);
- if (!nrec) {memset(buf,0,512); Fwrite(buf,512,1,ifp);}
- fclose(ifp);
- return 0;
- }
-
- /********** Index sort ******************************************************/
-
- // These functions implement a full index sort on the unsorted index
- // file.
-
- // The following structure is a buffer which holds a cluster from
- // the index file, whenever 2 elements are compared or sorted, the
- // buffers are checked first to see if the elements are contained therein
-
- struct _ibf
- {
- char *buf; // Points to character data
- _dbuf *dbuf; // Points to disk buffer associated with index
- long low; // Lowest element number in buffer
- long up; // Highest element number in buffer
- long n; // Element number in buffer to examine
- char *an; // Address of element number in buffer to examine
- char *tn; // Points to element data
- long dpos; // Position in disk file
- FILE *fp; // File pointer to index file
- int nclusrec; // No. of records/cluster
- int indlen; // index length
- int indexplen; // index expression length
- int indtype; // Index type
-
-
- _ibf(FILE *,int,int,int,int);
- // Constructor initialises buffer
- ~_ibf(); // Destructor flushes buffer to disk
- void setn(long n); // Set element no. of buffer to n
- // This will check if n is in the buffer
- // and if it is not will save the buffer
- // and update the buffer with new value
-
- void indswap(struct _ibf &s); // Swap element with s
- int indcomp(struct _ibf &s); // Compare element with s
-
- };
-
- _ibf::_ibf(FILE *ifp,int inclusrec,int iindlen,int iindexplen,
- int iindtype)
- {
- fp=ifp;
- nclusrec=inclusrec;
- indlen=iindlen;
- indexplen=iindexplen;
- indtype=iindtype;
- up=-1; // Flag dbuf is invalid
- dbuf=0;
- }
-
- _ibf::~_ibf()
- {
- dbuf->uflag--;
- if (!(dbuf->uflag) && dbuf->cval)
- {
- Fseek(fp,512L*dpos,SEEK_SET); Fwrite(buf,512,1,fp);
- }
- }
-
- // Set element number in buffer
- // If we are simply changing to another element in the same buffer then
- // just move the pointers. If we are moving buffers then check if the
- // current buffer should be written back to disk, see if anyone else
- // is using the new buffer & if so grab it, if not then set up a new
- // buffer holding the cluster we require from disk.
-
- void _ibf::setn(long nw)
- {
- n=nw;
- if (nw<low || nw>up) // Update buffer ?
- {
- if (dbuf) // Only if dbuf is valid
- {
- dbuf->uflag--;
- if (dbuf->cval && !dbuf->uflag)
- {
- Fseek(fp,512L*dpos,SEEK_SET); Fwrite(buf,512,1,fp);
- }
- }
-
- dpos=n/nclusrec+1;
-
- int fflag=0; // Flags buffer already exists
- _dbuf *nfree; // Points to a free buffer
-
- for(int i=0; i<DBUF; i++) // Now do we already have this buffer ?
- {
- if (_dbp[i].dpos==dpos) {fflag=1; break;}
- if (!(_dbp[i].uflag)) nfree=&(_dbp[i]);
- }
- if (fflag) // Someone else has grabbed this buffer for us
- {
- dbuf=&(_dbp[i]); buf=dbuf->buf; dbuf->uflag++;
- }
- else // We'll grab a new buffer
- {
- nfree->uflag=1; nfree->dpos=dpos; nfree->cval=0; buf=nfree->buf;
- Fseek(fp,512L*dpos,SEEK_SET); Fread(buf,512,1,fp);
- dbuf=nfree;
- }
- low=(dpos-1)*nclusrec;
- up=dpos*nclusrec-1;
- }
- an=buf+4+(n-low)*indlen; tn=an+8;
- }
-
- // This function swaps the current element with the current element of s
-
- void _ibf::indswap(_ibf &s)
- {
- char temp[110];
-
- memcpy(temp,an,indlen);
- memcpy(an,s.an,indlen);
- memcpy(s.an,temp,indlen);
- dbuf->cval=1; s.dbuf->cval=1; // Show contents have changed
- }
-
- // This function compares the current element with the current element of s
-
- int _ibf::indcomp(_ibf &s)
- {
- // if (indtype==OPSTR) return(strncmp(tn,s.tn,indexplen));
- if (indtype!=OPINT) return(strncmp(tn,s.tn,indexplen));
-
- double dr=(*(double *)(tn))-(*(double *)(s.tn));
-
- if (dr>0) return 1;
- if (dr<0) return -1;
- return 0;
- }
-
- // The following function takes a pointer to an index file, and
- // sorts the elements in it, it is a quicksort derived from the Zortech
- // qsort function itself derived from the Ray Gardner public domain function
-
- // indtype is the type of result, indlen the length of index+pad+pointers,
- // indexplen is the length of index expression alone, nel is the no. of
- // elements, and nclusrec the number of records/cluster
-
- void database::indsort(FILE *ifp,int indtype,int indlen,int indexplen,
- long nel,int nclusrec,void (*progress)(int,long),int order)
- {
- long nswap=0;
- long lastorder=-1;
- long stack[40],*sp; // stack and stack pointer
- _ibf i(ifp,nclusrec,indlen,indexplen,indtype); // scan and limit pointers
- _ibf j(ifp,nclusrec,indlen,indexplen,indtype);
- _ibf limit(ifp,nclusrec,indlen,indexplen,indtype);
- _ibf base(ifp,nclusrec,indlen,indexplen,indtype);
- _ibf temp(ifp,nclusrec,indlen,indexplen,indtype);
- for(int x=0; x<DBUF; x++) {_dbp[x].dpos=-1; _dbp[x].uflag=0;} // Init buffers
-
- sp=stack; // Init stack pointer
- base.setn(0);
- limit.setn(nel); /* pointer past end of array */
- while (1) /* repeat until done then return */
- {
- if (nswap>lastorder && progress)
- {(*progress)(2,nswap); lastorder=nswap+order;}
-
- while (limit.n-base.n>5) // Max span=5, otherwise insertion sort
- {
- temp.setn(((unsigned long)(limit.n-base.n)>>1)+base.n); // swap middle, base
- temp.indswap(base); nswap++;
-
- i.setn(base.n+1); /* i scans from left to right */
- j.setn(limit.n-1); /* j scans from right to left */
-
- if (i.indcomp(j)>0) {i.indswap(j); nswap++;} // Sedgewick's
- // three-element sort
- if (base.indcomp(j)>0) {base.indswap(j); nswap++;} // sets things up
- // so that
- if (i.indcomp(base)>0) {i.indswap(base); nswap++;} // i <= base <= j
- // base is the pivot element
-
- while (1)
- {
- do i.setn(i.n+1); /* move i right until i >= pivot */
- while (i.indcomp(base)<0);
-
- do j.setn(j.n-1); /* move j left until j <= pivot */
- while (j.indcomp(base)>0);
-
- if (i.n>j.n) break; /* break loop if pointers crossed */
-
- i.indswap(j); /* else swap elements, keep scanning */
- nswap++;
- }
-
- nswap++;
- base.indswap(j); /* move pivot into correct place */
- if (j.n-base.n>limit.n-i.n) /* if left subfile is larger... */
- {
- sp[0]=base.n; sp[1]=j.n; /* stack left subfile base */
- base.setn(i.n); /* and limit */
- } /* sort the right subfile */
-
- else /* else right subfile is larger */
- {
- sp[0]=i.n; sp[1]=limit.n; /* stack right subfile base */
- limit.setn(j.n); /* and limit */
- } /* sort the left subfile */
-
- sp+=2; /* increment stack pointer */
- }
-
- // Insertion sort on remaining subfile
-
- i.setn(base.n+1);
- while (i.n<limit.n)
- {
- j.setn(i.n);
- temp.setn(j.n-1);
- while (j.n>base.n && temp.indcomp(j)>0)
- {
- temp.indswap(j); nswap++;
- j.setn(j.n-1);
- temp.setn(j.n-1);
- }
- i.setn(i.n+1);
- }
-
- if (sp>stack) /* if any entries on stack... */
- {
- sp-=2; /* pop the base and limit */
- base.setn(sp[0]);
- limit.setn(sp[1]);
- }
- else break; /* else stack empty, all done */
- }
- }
-
- // This routine builds an index tree in a sorted flat index file
- // indlen is length of index + pointers
- // nrec is no. of records
- // nclusrec is no. of records/cluster
- // nclus is no. of clusters at the level of tree being indexed
- // indexplen is the index expression length
- // topclus is returned as the top cluster in the tree
-
- void database::buildtree(FILE *fp,int indlen,int indexplen,
- int nclusrec,long nclus,long &topclus)
- {
- long fclus=1; // First cluster of this level of tree
- long flclus; // First cluster of level being written
- char buf[512],ibuf[512]; // Holds current cluster & cluster from disk
- int recclus=1; // Flags we are looking at a record cluster
-
- if (nclus==1) flclus=1;
- while(nclus>1)
- {
- long tlclus=0; // Totals clusters written at this level
- long nclev=nclus/nclusrec; // No. of clusters to write at this level
-
- Fseek(fp,0L,SEEK_END);
- flclus=ftell(fp)/512L; // First cluster at this level
-
- for(long i=0; i<=nclev; i++) // For each cluster at this level
- {
- Fseek(fp,fclus*512L,SEEK_SET); // Find first cluster to be examined
- char *cop=buf+4; // Location to copy record to
- int nrecclus=0; // No. of records in cluster
-
- for(int j=0; j<nclusrec; j++)
- {
- char *lr; // points to last record in cluster
-
- if (!(nclus--)) break; // Last cluster yet ?
- Fread(ibuf,512,1,fp); // Cluster to be examined
- lr=ibuf+4+(*(long *)ibuf-recclus)*indlen; // Last record in cluster
- *(long *)cop=fclus++; cop+=sizeof(long); // Record number
- *(long *)cop=0; cop+=sizeof(long);
- memcpy(cop,lr+8,indexplen); // Copy in last record
- cop+=indlen-8;
- nrecclus++;
- }
- tlclus++;
- *(long *)buf=nrecclus-1; // No. of records in cluster
- Fseek(fp,0L,SEEK_END); Fwrite(buf,512,1,fp); // move to file end & write
- }
- fclus=flclus; nclus=tlclus; // Now index level just done
- if (recclus) recclus=0; // Now looking at blocks
- }
- topclus=flclus; // Top cluster in tree
- }
-