home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 5 Edit
/
05-Edit.zip
/
makedoc.zip
/
makedoc7.cpp
next >
Wrap
C/C++ Source or Header
|
1998-08-07
|
17KB
|
750 lines
/**********************************************************************
* Rick,
*
* I made some more changes to fix another portability problem. It seems
* that SOME compilers will pad a structure to a DWORD boundary when you
* use the sizeof operator. In particular, for the Solaris compiler, the
* 78 byte tDocHeader structure is reported as having 80 bytes. This shifts
* EVERYTHING by two bytes and wreaks havoc in the generated .prc file.
* I fixed this (look at the comments in struct tDocHeader and the DOCHEADSZ
* definition) in the two places it occurred.
*
* I also fixed a spelling error in an error message.
*
* I also changed the usage message to say this is version 0.7a (rather than
* 0.6).
*
* I also changed the return type of main() to be int and added various
* calls to exit() as needed. Needed for portability and correctness.
*
* -- Harold Bamford
**********************************************************************/
// MakeDoc
// version 0.7a
//
// Compresses text files into a format that is ready to export to a Pilot
// and work with Rick Bram's PilotDOC reader.
//
// Freeware
//
// ver 0.6 enforce 31 char limit on database names
// ver 0.7 change header and record0 to structs
// ver 0.7a minor mispellings and portability issues
#ifdef sparc
# ifndef UNIX
# define UNIX 1
# endif
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//template<class A> A max(const A& a, const A& b) {return (a<b) ? b : a;}
#define max(a,b) ((a>b) ? a : b)
typedef unsigned char byte;
typedef unsigned long DWORD;
typedef unsigned short WORD;
#define DISP_BITS 11
#define COUNT_BITS 3
// all numbers in these structs are big-endian, MAC format
struct tDocHeader {
char sName[32]; // 32 bytes
DWORD dwUnknown1; // 36
DWORD dwTime1; // 40
DWORD dwTime2; // 44
DWORD dwTime3; // 48
DWORD dwLastSync; // 52
DWORD ofsSort; // 56
DWORD ofsCatagories; // 60
DWORD dwCreator; // 64
DWORD dwType; // 68
DWORD dwUnknown2; // 72
DWORD dwUnknown3; // 76
WORD wNumRecs; // 78
};
// Some compilers pad structures out to DWORD boundaries so using sizeof()
// doesn't give the intended result.
#define DOCHEADSZ 78
struct tDocRecord0 {
WORD wVersion; // 1=plain text, 2=compressed
WORD wSpare;
DWORD dwStoryLen; // in bytes, when decompressed
WORD wNumRecs; // text records only; equals tDocHeader.wNumRecs-1
WORD wRecSize; // usually 0x1000
DWORD dwSpare2;
};
////////////// utilities //////////////////////////////////////
WORD SwapWord21(WORD r)
{
return (r>>8) + (r<<8);
}
WORD SwapWord12(WORD r)
{
return r;
}
DWORD SwapLong4321(DWORD r)
{
return ((r>>24) & 0xFF) + (r<<24) + ((r>>8) & 0xFF00) + ((r<<8) & 0xFF0000);
}
DWORD SwapLong1234(DWORD r)
{
return r;
}
WORD (*SwapWord)(WORD r) = NULL;
DWORD (*SwapLong)(DWORD r) = NULL;
// copy bytes into a word and double word and see how they fall,
// then choose the appropriate swappers to make things come out
// in the right order.
int SwapChoose()
{
union { char b[2]; WORD w; } w;
union { char b[4]; DWORD d; } d;
strncpy(w.b, "\1\2", 2);
strncpy(d.b, "\1\2\3\4", 4);
if (w.w == 0x0201)
SwapWord = SwapWord21;
else if (w.w == 0x0102)
SwapWord = SwapWord12;
else
return 0;
if (d.d == 0x04030201)
SwapLong = SwapLong4321;
else if (d.d == 0x01020304)
SwapLong = SwapLong1234;
else
return 0;
return 1;
}
// replacement for strstr() which deals with 0's in the data
byte* memfind(byte* t, int t_len, byte* m, int m_len)
{
int i;
for (i = t_len - m_len + 1 ; i>0; i--, t++)
if (t[0]==m[0] && memcmp(t,m,m_len)==0)
return t;
return 0;
}
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
///////////////////// //////////////////////
///////////////////// tBuf class //////////////////////
///////////////////// //////////////////////
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
struct tBuf {
byte* buf;
unsigned len;
tBuf() {buf = new byte[len = 6000];};
~tBuf() { if (buf) delete[] buf; }
unsigned Len() const { return len; }
unsigned RemoveBinary();
unsigned Decompress();
unsigned Compress();
unsigned Issue(byte src, int& bSpace);
unsigned DuplicateCR();
void Clear() {delete[] buf; buf = new byte[len = 6000]; }
void Dump() {printf("\nbuffer len=%d",len);}
};
//
// Issue()
//
// action: handle the details of writing a single
// character to the compressed stream
//
unsigned
tBuf::Issue(byte src, int& bSpace)
{
int iDest = len;
byte* dest = buf;
// if there is an outstanding space char, see if
// we can squeeze it in with an ASCII char
if (bSpace)
{
if (src>=0x40 && src<=0x7F)
dest[iDest++] = src ^ 0x80;
else
{
// couldn't squeeze it in, so issue the space char by itself
// most chars go out simple, except the range 1...8,0x80...0xFF
dest[iDest++] = ' ';
if (src<0x80 && (src==0 || src>8) )
dest[iDest++] = src;
else
dest[iDest++] = 1, dest[iDest++] = src;
}
// knock down the space flag
bSpace = 0;
}
else
{
// check for a space char
if (src==' ')
bSpace = 1;
else
{
if (src<0x80 && (src==0 || src>8))
dest[iDest++] = src;
else
dest[iDest++] = 1, dest[iDest++] = src;
}
}
len = iDest;
return iDest;
}
//
// Compress
//
// params: none
//
// action: takes the given buffer,
// and compresses
// the original data down into a second buffer
//
// comment: This version make heavy use of walking pointers.
//
unsigned tBuf::Compress()
{
int i,j;
int bSpace = 0;
// run through the input buffer
byte* pBuffer; // points to the input buffer
byte* pHit; // points to a walking test hit; works upwards on successive matches
byte* pPrevHit; // previous value of pHit
byte* pTestHead; // current test string
byte* pTestTail; // current walking pointer; one past the current test buffer
byte* pEnd; // 1 past the end of the input buffer
pHit = pPrevHit = pTestHead = pBuffer = buf;
pTestTail = pTestHead+1;
pEnd = buf + len;
//printf("pointers %x %x",pTestTail, pEnd);
//printf("\nstart compression buf len=%d",len);
// make a dest buffer and reassign the local buffer
buf = new byte[6000];
len = 0; // used to walk through the output buffer
// loop, absorbing one more char from the input buffer on each pass
for (; pTestHead != pEnd; pTestTail++)
{
//printf("\npointers pTestHead %x pTestTail %x pTestHead[]=%x %x",pTestHead, pTestTail, pTestHead[0], pTestHead[1]);
// establish where the scan can begin
if (pTestHead - pPrevHit > ((1<<DISP_BITS)-1))
pPrevHit = pTestHead - ((1<<DISP_BITS)-1);
// scan in the previous data for a match
pHit = memfind(pPrevHit, pTestTail - pPrevHit, pTestHead, pTestTail - pTestHead);
if (pHit==0)
printf("!! bug source %x%x%x, dest %x%x%x, %d bytes", pPrevHit[0],
pPrevHit[1],pPrevHit[2],pTestHead[0],
pTestHead[1], pTestHead[2], pTestTail-pTestHead);
// on a mismatch or end of buffer, issued codes
if (pHit==0
|| pHit==pTestHead
|| pTestTail-pTestHead>(1<<COUNT_BITS)+2
|| pTestTail==pEnd)
{
// issued the codes
// first, check for short runs
if (pTestTail-pTestHead < 4)
{
//printf("\nissue a char %x",pTestHead[0]);
Issue(pTestHead[0], bSpace);
pTestHead++;
}
// for longer runs, issue a run-code
else
{
// issue space char if required
if (bSpace) buf[len++] = ' ', bSpace = 0;
unsigned int dist = pTestHead - pPrevHit;
unsigned int compound = (dist << COUNT_BITS) + pTestTail-pTestHead - 4;
if (dist>=(1<<DISP_BITS)) printf("\n!! error dist overflow");
if (pTestTail-pTestHead-4>7) printf("\n!! error dist overflow");
buf[len++] = 0x80 + (compound>>8);
buf[len++] = compound & 0xFF;
//printf("\nissuing code for sequence len %d <%c%c%c>",pTestTail-pTestHead-1,pTestHead[0],pTestHead[1],pTestHead[2]);
//printf("\n <%x%x>",pOut[-2],pOut[-1]);
// and start again
pTestHead = pTestTail-1;
}
// start the search again
pPrevHit = pBuffer;
}
// got a match
else
{
pPrevHit = pHit;
}
//printf("pointers %x %x %x",pTestHead, pTestTail, pPrevHit);
// when we get to the end of the buffer, don't inc past the end
// this forces the residue chars out one at a time
if (pTestTail==pEnd) pTestTail--;
}
// clean up any dangling spaces
if (bSpace) buf[len++] = ' ';
// final scan to merge consecutive high chars together
int k;
for (i=k=0; i<len; i++,k++)
{
buf[k] = buf[i];
// skip the run-length codes
if (buf[k]>=0x80 && buf[k]<0xC0)
buf[++k] = buf[++i];
// if we hit a high char marker, look ahead for another
else if (buf[k]==1)
{
buf[k+1] = buf[i+1];
while (i+2<len && buf[i+2]==1 && buf[k]<8)
{
buf[k]++;
buf[k+buf[k]] = buf[i+3];
i+=2;
}
k += buf[k]; i++;
}
}
// delete original buffer
delete[] pBuffer;
len = k;
return k;
}
/*
Decompress
params: none
action: make a new buffer
run through the source data
check the 4 cases:
0,9...7F represent self
1...8 escape n chars
80...bf reference earlier run
c0...ff space+ASCII
*/
unsigned
tBuf::Decompress()
{
// we "know" that all decompresses fit within 4096, right?
byte* pOut = new byte[6000];
byte* in_buf = buf;
byte* out_buf = pOut;
int i,j;
for (j=i=0; j<len; )
{
unsigned int c;
// take a char from the input buffer
c = in_buf[j++];
// separate the char into zones: 0, 1...8, 9...0x7F, 0x80...0xBF, 0xC0...0xFF
// codes 1...8 mean copy that many bytes; for accented chars & binary
if (c>0 && c<9)
while(c--) out_buf[i++] = in_buf[j++];
// codes 0, 9...0x7F represent themselves
else if (c<0x80)
out_buf[i++] = c;
// codes 0xC0...0xFF represent "space + ascii char"
else if (c>=0xC0)
out_buf[i++] = ' ', out_buf[i++] = c ^ 0x80;
// codes 0x80...0xBf represent sequences
else
{
int m,n;
c <<= 8;
c += in_buf[j++];
m = (c & 0x3FFF) >> COUNT_BITS;
n = c & ((1<<COUNT_BITS) - 1);
n += 3;
while (n--)
{
out_buf[i] = out_buf[i-m];
i++;
}
}
}
delete[] buf;
buf = pOut;
len = i;
return i;
}
unsigned tBuf::DuplicateCR()
{
byte* pBuf = new byte[2*len];
int k,j;
for (j=k=0; j<len; j++, k++)
{
pBuf[k] = buf[j];
if (pBuf[k]==0x0A) pBuf[k++] = 0x0D, pBuf[k] = 0x0A;
}
delete[] buf;
buf = pBuf;
len = k;
return k;
}
void Decomp(char* src, char* dest, int bBinary)
{
FILE* fin;
FILE* fout;
fin = fopen(src,"rb");
if (fin==0)
{
printf("problem opening source file %s", src);
exit(2);
}
// just holds the first few bytes of the file
byte buf[0x100];
tDocHeader head;
fread(&head, 1, DOCHEADSZ, fin);
if (strncmp((char *)&head.dwType, "REAd", 4) != 0
|| strncmp((char *)&head.dwCreator, "TEXt", 4) != 0)
{
//printf("file contains %.4s, %.4s", (char *)&head.dwCreator, (char *)&head.dwType);
printf(".prc file is not the correct format");
exit(3);
}
WORD bCompressed;
DWORD dwPos;
tDocRecord0 rec0;
// point to start of index
fseek(fin, 0x4E, SEEK_SET);
// read the location of the first record
fread(&dwPos, 4, 1, fin);
dwPos = SwapLong(dwPos);
fseek(fin, dwPos, SEEK_SET);
fread(&rec0, sizeof(rec0), 1, fin);
bCompressed = SwapWord(rec0.wVersion);
if (bCompressed!=1 && bCompressed!=2)
printf("\nWARNING: unknown file compression type:%d",bCompressed);
bCompressed--;
fout = fopen(dest,"wb");
if (fout==0)
{
printf("problem opening output file %s",dest);
exit(2);
}
DWORD dwLen;
fseek(fin,0,SEEK_END);
dwLen = ftell(fin);
WORD nRecs;
nRecs = SwapWord(head.wNumRecs) - 1;
// this is the main record buffer
// it knows how to stretch to accomodate the decompress
tBuf t;
DWORD dwRecLen;
for (int i=0; i<nRecs; i++)
{
// read the record offset
fseek(fin, 0x56 + 8*i, SEEK_SET);
fread(&dwPos, 4, 1, fin);
dwPos = SwapLong(dwPos);
// read start of next record
fseek(fin, 0x5E + 8*i, SEEK_SET);
fread(&dwRecLen, 4, 1, fin);
dwRecLen = SwapLong(dwRecLen);
// for the last, use the file len
if (i==nRecs-1) dwRecLen = dwLen;
dwRecLen -= dwPos;
fseek(fin,dwPos,SEEK_SET);
int n = fread(t.buf, 1, dwRecLen, fin);
t.len = n;
if(bCompressed)
t.Decompress();
// check for CR insert
if (!bBinary)
t.DuplicateCR();
printf("\rreconverting %s: record %d of %d",head.sName,i,nRecs);
fwrite(t.buf, 1, t.Len(), fout);
}
fclose(fin);
fclose(fout);
}
// this nasty little beast removes really low ASCII and 0's
// and handles the CR problem
//
// if a cr appears before a lf, then remove the cr
// if a cr appears in isolation, change to a lf
unsigned tBuf::RemoveBinary()
{
byte* in_buf = buf;
byte* out_buf = new byte[len];
int k,j;
for (j=k=0; j<len; j++,k++)
{
// copy each byte
out_buf[k] = in_buf[j];
// throw away really low ASCII
if ((out_buf[k]>=0 && out_buf[k]<9)) k--;
// for CR
if (out_buf[k]==0x0D)
{
// if next is LF, then drop it
if (j<len-1 && in_buf[j+1]==0x0A)
k--;
else // turn it into a LF
out_buf[k] = 0x0A;
}
}
delete[] buf;
buf = out_buf;
len = k;
return k;
}
void out_word(short w, FILE* fout)
{
short m = SwapWord(w);
fwrite(&m,2,1,fout);
}
void out_long(long d, FILE* fout)
{
long d1 = SwapLong(d);
fwrite(&d1,4,1,fout);
}
int
main(int argc, char** argv)
{
printf("MakeDoc ver 0.7a\n");
if (argc<4)
{
printf("\nsyntax makedoc [-n] [-b] <text-file> <prc-file> <story-name>");
printf("\n convert text files to .PRC format");
printf("\n makedoc -d [-b] <prc-file> <text-file>");
printf("\n decodes the PRC back into the txt file");
printf("\n -n builds the .prc file without compression");
printf("\n -b option compresses/decompresses binary");
#if UNIX
printf("\n");
#endif
exit(1);
}
int iArg = 1;
int bDecomp = 0;
int bBinary = 0;
int bReport = 0;
int bCompress = 1;
if ( ! SwapChoose()) {
printf("\nfailed to select proper byte swapping algorithm");
#if UNIX
printf("\n");
#endif
exit(1);
}
while (argv[iArg][0]=='-' || argv[iArg][0]=='\\')
{
if (argv[iArg][1]=='d')
bDecomp = 1;
if (argv[iArg][1]=='b')
bBinary = 1;
if (argv[iArg][1]=='r')
bReport = 1;
if (argv[iArg][1]=='n')
bCompress = 0;
iArg++;
}
if (bDecomp)
Decomp(argv[iArg], argv[iArg+1], bBinary);
else
{
FILE* fin;
FILE* fout;
tDocHeader head1;
fin = fopen(argv[iArg],"rb");
fout = fopen(argv[iArg+1],"wb");
if (fin==0 || fout==0)
{
printf("problem opening files");
exit(2);
}
fseek(fin,0,SEEK_END);
DWORD storySize = ftell(fin);
fseek(fin,0,SEEK_SET);
DWORD x;
WORD w;
long recSize = 4096;
DWORD z,numRecs;
sprintf(head1.sName,"%.31s",argv[iArg+2]);
head1.sName[31] = 0;
printf("saving to %s as <%s>,%s%s compressed",argv[iArg+1],argv[iArg+2],
bBinary ? " binary mode," : "",
bCompress ? "" : " not");
/*LocalWrite just writes to the new file the number of bytes starting at the passed pointer*/
head1.dwUnknown1 = 0;
strncpy((char *)&head1.dwTime1, "\x06\xD1\x44\xAE", 4);
strncpy((char *)&head1.dwTime2, "\x06\xD1\x44\xAE", 4);
head1.dwTime3 = 0;
head1.dwLastSync = 0;
head1.ofsSort = 0;
head1.ofsCatagories = 0;
strncpy((char *)&head1.dwCreator, "TEXt", 4); // database creator
strncpy((char *)&head1.dwType, "REAd", 4); // database type
head1.dwUnknown2 = 0;
head1.dwUnknown3 = 0;
z = (int) (storySize/(long) recSize);
if (((long) z * recSize) < storySize)
z ++;
numRecs = z;
z ++;
head1.wNumRecs = SwapWord(z); // the number of records to follow
fwrite(&head1,1,DOCHEADSZ,fout);
unsigned long index;
index = 0x406F8000; // the pattern for attributes=dirty + unique_id=0x6f8000
x = 0x50L + (long) z * 8;
out_long(x,fout); // start writing the record offsets
out_long(index,fout);
x += 0x0010L;
index++;
z--;
while(z--) {
out_long(x,fout); //more record offsets
out_long(index++,fout); // the attributes + ID's
x += 0x1000L;
}
// one more word.....
out_word(0,fout);
tDocRecord0 rec0;
rec0.wVersion = SwapWord(bCompress ? 2 : 1);
rec0.wSpare = 0;
rec0.dwStoryLen = SwapLong(storySize);
rec0.wNumRecs = SwapWord(SwapWord(head1.wNumRecs) - 1);
rec0.wRecSize = SwapWord(recSize);
rec0.dwSpare2 = 0;
fwrite(&rec0,1,sizeof(rec0),fout);
int n = recSize;
// dump the whole story into the new file
int recNum = 0;
printf("\n");
tBuf buf;
while(recNum < numRecs)
{
long pos;
pos = ftell(fout);
fseek(fout, 0x56 + 8*recNum, SEEK_SET);
if (recNum!=numRecs) out_long(pos,fout);
fseek(fout, pos, SEEK_SET);
int nOrg;
buf.Clear();
nOrg = n = fread(buf.buf,1,4096,fin);
buf.len = n;
if (n==0) break;
if (!bBinary)
buf.RemoveBinary();
if (bCompress)
buf.Compress();
n = fwrite(buf.buf,1,buf.Len(),fout);
printf("\rconverting record %d of %d",recNum+1,numRecs);
if (bReport && n && bCompress)
printf("\noriginal %d bytes, compressed to %d bytes, ratio: %f5.1\n",
nOrg, n, 100. * n / nOrg);
recNum++;
}
fclose(fin);
fclose(fout);
}
exit(0);
}