home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 5 Edit
/
05-Edit.zip
/
anwor032.zip
/
antiword.0.32
/
wordlib.c
< prev
next >
Wrap
C/C++ Source or Header
|
2001-09-12
|
24KB
|
872 lines
/*
* wordlib.c
* Copyright (C) 1998-2001 A.J. van Os; Released under GPL
*
* Description:
* Deal with the internals of a MS Word file
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "antiword.h"
/* Private type for Property Set Storage entries */
typedef struct pps_entry_tag {
char szName[32];
int iType;
int iNext;
int iPrev;
int iDir;
long lSb;
long lSize;
int iLevel;
} pps_entry_type;
/* Macro to make sure all such statements will be identical */
#define FREE_ALL() \
do {\
vDestroySmallBlockList();\
alRootList = xfree(alRootList);\
alSbdList = xfree(alSbdList);\
alBbdList = xfree(alBbdList);\
alSBD = xfree(alSBD);\
alBBD = xfree(alBBD);\
} while(0)
/*
* ulReadLong - read four bytes from the given file and offset
*/
static unsigned long
ulReadLong(FILE *pFile, long lOffset)
{
unsigned char aucBytes[4];
fail(pFile == NULL || lOffset < 0);
if (!bReadBytes(aucBytes, 4, lOffset, pFile)) {
werr(1, "Read long %ld not possible", lOffset);
}
return ulGetLong(0, aucBytes);
} /* end of ulReadLong */
/*
* vName2String - turn the name into a proper string.
*/
static void
vName2String(char *szName, const unsigned char *aucBytes, int iNameSize)
{
char *pcChar;
int iIndex;
fail(aucBytes == NULL || szName == NULL);
if (iNameSize <= 0) {
szName[0] = '\0';
return;
}
for (iIndex = 0, pcChar = szName;
iIndex < 2 * iNameSize;
iIndex += 2, pcChar++) {
*pcChar = (char)aucBytes[iIndex];
}
szName[iNameSize - 1] = '\0';
} /* end of vName2String */
/*
* tReadBlockIndices - read the Big/Small Block Depot indices
*
* Returns the number of indices read
*/
static size_t
tReadBlockIndices(FILE *pFile, long *alBlockDepot, size_t tMaxRec, long lOffset)
{
size_t tDone;
int iIndex;
unsigned char aucBytes[BIG_BLOCK_SIZE];
fail(pFile == NULL || alBlockDepot == NULL);
fail(tMaxRec == 0);
fail(lOffset < 0);
/* Read a big block with BBD or SBD indices */
if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, lOffset, pFile)) {
werr(0, "Reading big block from %ld is not possible", lOffset);
return 0;
}
/* Split the big block into indices, an index is four bytes */
tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
alBlockDepot[iIndex] = (long)ulGetLong(4 * iIndex, aucBytes);
NO_DBG_DEC(alBlockDepot[iIndex]);
}
return tDone;
} /* end of tReadBlockIndices */
/*
* bGetBBD - get the Big Block Depot indices from the index-blocks
*/
static BOOL
bGetBBD(FILE *pFile, const long *alDepot, size_t tDepotLen,
long *alBBD, size_t tBBDLen)
{
long lBegin;
size_t tToGo, tDone;
int iIndex;
fail(pFile == NULL || alDepot == NULL || alBBD == NULL);
DBG_MSG("bGetBBD");
tToGo = tBBDLen;
for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
lBegin = (alDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
NO_DBG_HEX(lBegin);
tDone = tReadBlockIndices(pFile, alBBD, tToGo, lBegin);
fail(tDone > tToGo);
if (tDone == 0) {
return FALSE;
}
alBBD += tDone;
tToGo -= tDone;
}
return tToGo == 0;
} /* end of bGetBBD */
/*
* bGetSBD - get the Small Block Depot indices from the index-blocks
*/
static BOOL
bGetSBD(FILE *pFile, const long *alDepot, size_t tDepotLen,
long *alSBD, size_t tSBDLen)
{
long lBegin;
size_t tToGo, tDone;
int iIndex;
fail(pFile == NULL || alDepot == NULL || alSBD == NULL);
DBG_MSG("bGetSBD");
tToGo = tSBDLen;
for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
lBegin = (alDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
NO_DBG_HEX(lBegin);
tDone = tReadBlockIndices(pFile, alSBD, tToGo, lBegin);
fail(tDone > tToGo);
if (tDone == 0) {
return FALSE;
}
alSBD += tDone;
tToGo -= tDone;
}
return tToGo == 0;
} /* end of bGetSBD */
/*
* vComputePPSlevels - compute the levels of the Property Set Storage entries
*/
static void
vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
int iLevel, int iRecursionLevel)
{
fail(atPPSlist == NULL || pNode == NULL);
fail(iLevel < 0 || iRecursionLevel < 0);
if (iRecursionLevel > 25) {
/* This removes the possibility of an infinite recursion */
DBG_DEC(iRecursionLevel);
return;
}
if (pNode->iLevel <= iLevel) {
/* Avoid entering a loop */
DBG_DEC(iLevel);
DBG_DEC(pNode->iLevel);
return;
}
pNode->iLevel = iLevel;
if (pNode->iDir != -1) {
vComputePPSlevels(atPPSlist,
&atPPSlist[pNode->iDir],
iLevel + 1,
iRecursionLevel + 1);
}
if (pNode->iNext != -1) {
vComputePPSlevels(atPPSlist,
&atPPSlist[pNode->iNext],
iLevel,
iRecursionLevel + 1);
}
if (pNode->iPrev != -1) {
vComputePPSlevels(atPPSlist,
&atPPSlist[pNode->iPrev],
iLevel,
iRecursionLevel + 1);
}
} /* end of vComputePPSlevels */
/*
* bGetPPS - search the Property Set Storage for three sets
*
* Return TRUE if the WordDocument PPS is found
*/
static BOOL
bGetPPS(FILE *pFile,
const long *alRootList, size_t tRootListLen, pps_info_type *pPPS)
{
pps_entry_type *atPPSlist;
long lBegin, lTmp;
size_t tNbrOfPPS;
int iIndex, iStartBlock, iOffset;
int iNameSize, iRootIndex;
BOOL bWord, bExcel;
unsigned char aucBytes[PROPERTY_SET_STORAGE_SIZE];
fail(pFile == NULL || pPPS == NULL || alRootList == NULL);
DBG_MSG("bGetPPS");
NO_DBG_DEC(tRootListLen);
bWord = FALSE;
bExcel = FALSE;
(void)memset(pPPS, 0, sizeof(*pPPS));
/* Read and store all the Property Set Storage entries */
tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
atPPSlist = xmalloc(tNbrOfPPS * sizeof(pps_entry_type));
iRootIndex = 0;
for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
lTmp = (long)iIndex * PROPERTY_SET_STORAGE_SIZE;
iStartBlock = (int)(lTmp / BIG_BLOCK_SIZE);
iOffset = (int)(lTmp % BIG_BLOCK_SIZE);
lBegin = (alRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
iOffset;
NO_DBG_HEX(lBegin);
if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
lBegin, pFile)) {
werr(0, "Reading PPS %d is not possible", iIndex);
atPPSlist = xfree(atPPSlist);
return FALSE;
}
iNameSize = (int)usGetWord(0x40, aucBytes);
iNameSize = (iNameSize + 1) / 2;
vName2String(atPPSlist[iIndex].szName, aucBytes, iNameSize);
atPPSlist[iIndex].iType = (int)ucGetByte(0x42, aucBytes);
if (atPPSlist[iIndex].iType == 5) {
iRootIndex = iIndex;
}
atPPSlist[iIndex].iPrev = (int)ulGetLong(0x44, aucBytes);
atPPSlist[iIndex].iNext = (int)ulGetLong(0x48, aucBytes);
atPPSlist[iIndex].iDir = (int)ulGetLong(0x4c, aucBytes);
atPPSlist[iIndex].lSb = (long)ulGetLong(0x74, aucBytes);
atPPSlist[iIndex].lSize = (long)ulGetLong(0x78, aucBytes);
atPPSlist[iIndex].iLevel = INT_MAX;
if (atPPSlist[iIndex].iPrev < -1 ||
atPPSlist[iIndex].iPrev >= (int)tNbrOfPPS ||
atPPSlist[iIndex].iNext < -1 ||
atPPSlist[iIndex].iNext >= (int)tNbrOfPPS ||
atPPSlist[iIndex].iDir < -1 ||
atPPSlist[iIndex].iDir >= (int)tNbrOfPPS) {
DBG_DEC(iIndex);
DBG_DEC(atPPSlist[iIndex].iPrev);
DBG_DEC(atPPSlist[iIndex].iNext);
DBG_DEC(atPPSlist[iIndex].iDir);
DBG_DEC(tNbrOfPPS);
werr(0, "The Property Set Storage is damaged");
atPPSlist = xfree(atPPSlist);
return FALSE;
}
}
#if 0 /* defined(DEBUG) */
DBG_MSG("Before");
for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
DBG_MSG(atPPSlist[iIndex].szName);
DBG_HEX(atPPSlist[iIndex].iDir);
DBG_HEX(atPPSlist[iIndex].iPrev);
DBG_HEX(atPPSlist[iIndex].iNext);
DBG_DEC(atPPSlist[iIndex].iSb);
DBG_HEX(atPPSlist[iIndex].lSize);
DBG_DEC(atPPSlist[iIndex].iLevel);
}
#endif /* DEBUG */
/* Add level information to each entry */
vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
/* Check the entries on level 1 for the required information */
NO_DBG_MSG("After");
for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
#if 0 /* defined(DEBUG) */
DBG_MSG(atPPSlist[iIndex].szName);
DBG_HEX(atPPSlist[iIndex].iDir);
DBG_HEX(atPPSlist[iIndex].iPrev);
DBG_HEX(atPPSlist[iIndex].iNext);
DBG_DEC(atPPSlist[iIndex].iSb);
DBG_HEX(atPPSlist[iIndex].lSize);
DBG_DEC(atPPSlist[iIndex].iLevel);
#endif /* DEBUG */
if (atPPSlist[iIndex].iLevel != 1 ||
atPPSlist[iIndex].iType != 2 ||
atPPSlist[iIndex].szName[0] == '\0' ||
atPPSlist[iIndex].lSize <= 0) {
continue;
}
if (pPPS->tWordDocument.lSize <= 0 &&
STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
pPPS->tWordDocument.lSb = atPPSlist[iIndex].lSb;
pPPS->tWordDocument.lSize = atPPSlist[iIndex].lSize;
bWord = TRUE;
} else if (pPPS->tData.lSize <= 0 &&
STREQ(atPPSlist[iIndex].szName, "Data")) {
pPPS->tData.lSb = atPPSlist[iIndex].lSb;
pPPS->tData.lSize = atPPSlist[iIndex].lSize;
} else if (pPPS->t0Table.lSize <= 0 &&
STREQ(atPPSlist[iIndex].szName, "0Table")) {
pPPS->t0Table.lSb = atPPSlist[iIndex].lSb;
pPPS->t0Table.lSize = atPPSlist[iIndex].lSize;
} else if (pPPS->t1Table.lSize <= 0 &&
STREQ(atPPSlist[iIndex].szName, "1Table")) {
pPPS->t1Table.lSb = atPPSlist[iIndex].lSb;
pPPS->t1Table.lSize = atPPSlist[iIndex].lSize;
} else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
STREQ(atPPSlist[iIndex].szName, "Workbook")) {
bExcel = TRUE;
}
}
/* Free the space for the Property Set Storage entries */
atPPSlist = xfree(atPPSlist);
/* Draw your conclusions */
if (bWord) {
return TRUE;
}
if (bExcel) {
werr(0, "Sorry, but this is an Excel spreadsheet");
} else {
werr(0, "This OLE file does not contain a Word document");
}
return FALSE;
} /* end of bGetPPS */
/*
* vGetBbdList - make a list of the places to find big blocks
*/
static void
vGetBbdList(FILE *pFile, int iNbr, long *alBbdList, long lOffset)
{
int iIndex;
fail(pFile == NULL);
fail(iNbr > 127);
fail(alBbdList == NULL);
fail(lOffset < 0);
NO_DBG_DEC(iNbr);
for (iIndex = 0; iIndex < iNbr; iIndex++) {
alBbdList[iIndex] =
(long)ulReadLong(pFile, lOffset + 4 * (long)iIndex);
NO_DBG_DEC(iIndex);
NO_DBG_HEX(alBbdList[iIndex]);
}
} /* end of vGetBbdList */
/*
* bGetDocumentText - make a list of the text blocks of a Word document
*
* Return TRUE when succesful, otherwise FALSE
*/
static BOOL
bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
const long *alBBD, size_t tBBDLen,
const long *alSBD, size_t tSBDLen,
const unsigned char *aucHeader, int iWordVersion)
{
long lBeginOfText;
long lTextLen, lFootnoteLen, lEndnoteLen;
long lHeaderLen, lMacroLen, lAnnotationLen;
long lTextBoxLen, lHdrTextBoxLen;
unsigned int uiQuickSaves;
BOOL bFarEastWord, bFastSaved, bEncrypted, bSuccess;
unsigned short usDocStatus, usIdent;
fail(pFile == NULL || pPPS == NULL);
fail(alBBD == NULL);
fail(alSBD == NULL);
DBG_MSG("bGetDocumentText");
/* Get the "magic number" from the header */
usIdent = usGetWord(0x00, aucHeader);
DBG_HEX(usIdent);
bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
usIdent == 0xa697 || usIdent == 0xa699;
/* Get the status flags from the header */
usDocStatus = usGetWord(0x0a, aucHeader);
DBG_HEX(usDocStatus);
bFastSaved = (usDocStatus & BIT(2)) != 0;
uiQuickSaves = (usDocStatus & 0x00f0) >> 4;
DBG_MSG_C(bFastSaved, "This document is Fast Saved");
DBG_DEC_C(bFastSaved, uiQuickSaves);
bEncrypted = (usDocStatus & BIT(8)) != 0;
if (bEncrypted) {
werr(0, "Encrypted documents are not supported");
return FALSE;
}
/* Get length information */
lBeginOfText = (long)ulGetLong(0x18, aucHeader);
DBG_HEX(lBeginOfText);
if (iWordVersion == 6 || iWordVersion == 7) {
lTextLen = (long)ulGetLong(0x34, aucHeader);
lFootnoteLen = (long)ulGetLong(0x38, aucHeader);
lHeaderLen = (long)ulGetLong(0x3c, aucHeader);
lMacroLen = (long)ulGetLong(0x40, aucHeader);
lAnnotationLen = (long)ulGetLong(0x44, aucHeader);
lEndnoteLen = (long)ulGetLong(0x48, aucHeader);
lTextBoxLen = (long)ulGetLong(0x4c, aucHeader);
lHdrTextBoxLen = (long)ulGetLong(0x50, aucHeader);
} else {
lTextLen = (long)ulGetLong(0x4c, aucHeader);
lFootnoteLen = (long)ulGetLong(0x50, aucHeader);
lHeaderLen = (long)ulGetLong(0x54, aucHeader);
lMacroLen = (long)ulGetLong(0x58, aucHeader);
lAnnotationLen = (long)ulGetLong(0x5c, aucHeader);
lEndnoteLen = (long)ulGetLong(0x60, aucHeader);
lTextBoxLen = (long)ulGetLong(0x64, aucHeader);
lHdrTextBoxLen = (long)ulGetLong(0x68, aucHeader);
}
DBG_DEC(lTextLen);
DBG_DEC(lFootnoteLen);
DBG_DEC(lHeaderLen);
DBG_DEC(lMacroLen);
DBG_DEC(lAnnotationLen);
DBG_DEC(lEndnoteLen);
DBG_DEC(lTextBoxLen);
DBG_DEC(lHdrTextBoxLen);
/* Make a list of the text blocks */
switch (iWordVersion) {
case 6:
case 7:
if (bFastSaved) {
bSuccess = bGet6DocumentText(pFile,
bFarEastWord,
pPPS->tWordDocument.lSb,
alBBD, tBBDLen,
aucHeader);
} else {
bSuccess = bAddTextBlocks(lBeginOfText,
lTextLen +
lFootnoteLen +
lHeaderLen + lMacroLen + lAnnotationLen +
lEndnoteLen +
lTextBoxLen + lHdrTextBoxLen,
bFarEastWord,
IGNORE_PROPMOD,
pPPS->tWordDocument.lSb,
alBBD, tBBDLen);
}
break;
case 8:
bSuccess = bGet8DocumentText(pFile,
pPPS,
alBBD, tBBDLen, alSBD, tSBDLen,
aucHeader);
break;
default:
werr(0, "This version of Word is not supported");
bSuccess = FALSE;
break;
}
if (bSuccess) {
vSplitBlockList(lTextLen,
lFootnoteLen,
lHeaderLen + lMacroLen + lAnnotationLen,
lEndnoteLen,
lTextBoxLen + lHdrTextBoxLen,
!bFastSaved && iWordVersion == 8);
} else {
vDestroyTextBlockList();
werr(0, "I can't find the text of this document");
}
return bSuccess;
} /* end of bGetDocumentText */
/*
* vGetDocumentData - make a list of the data blocks of a Word document
*/
static void
vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
const long *alBBD, size_t tBBDLen,
const unsigned char *aucHeader, int iWordVersion)
{
options_type tOptions;
long lBeginOfText;
BOOL bFastSaved, bHasImages, bSuccess;
unsigned short usDocStatus;
fail(pFile == NULL);
fail(pPPS == NULL);
fail(alBBD == NULL);
/* Get the options */
vGetOptions(&tOptions);
/* Get the status flags from the header */
usDocStatus = usGetWord(0x0a, aucHeader);
DBG_HEX(usDocStatus);
bFastSaved = (usDocStatus & BIT(2)) != 0;
bHasImages = (usDocStatus & BIT(3)) != 0;
if (!bHasImages ||
!tOptions.bUseOutlineFonts ||
tOptions.eImageLevel == level_no_images) {
/*
* No images in the document or text-only output or
* no images wanted, so no data blocks will be needed
*/
vDestroyDataBlockList();
return;
}
/* Get length information */
lBeginOfText = (long)ulGetLong(0x18, aucHeader);
DBG_HEX(lBeginOfText);
/* Make a list of the data blocks */
switch (iWordVersion) {
case 6:
case 7:
/*
* The data blocks are in the text stream. The text stream
* is in "fast saved" format or "normal saved" format
*/
if (bFastSaved) {
bSuccess = bGet6DocumentData(pFile,
pPPS->tWordDocument.lSb,
alBBD, tBBDLen,
aucHeader);
} else {
bSuccess = bAddDataBlocks(lBeginOfText, LONG_MAX,
pPPS->tWordDocument.lSb, alBBD, tBBDLen);
}
break;
case 8:
/*
* The data blocks are in the data stream. The data stream
* is always in "normal saved" format
*/
bSuccess = bAddDataBlocks(0, LONG_MAX,
pPPS->tData.lSb, alBBD, tBBDLen);
break;
default:
werr(0, "This version of Word is not supported");
bSuccess = FALSE;
break;
}
if (!bSuccess) {
vDestroyDataBlockList();
werr(0, "I can't find the data of this document");
}
} /* end of vGetDocumentData */
/*
* iInitDocument - initialize the document
*
* Returns the version of Word that made the document or -1
*/
int
iInitDocument(FILE *pFile, long lFilesize)
{
pps_info_type PPS_info;
long *alBBD, *alSBD;
long *alRootList, *alBbdList, *alSbdList;
long lRootStartblock, lSbdStartblock, lSBLstartblock;
long lAdditionalBBDlist;
long lBdbListStart, lMaxBlock, lTmp;
size_t tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
int iWordVersion, iIndex, iStart, iToGo;
int iMaxSmallBlock;
BOOL bSuccess;
unsigned short usIdent;
unsigned char aucHeader[HEADER_SIZE];
fail(pFile == NULL);
lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
DBG_DEC(lMaxBlock);
if (lMaxBlock < 1) {
return -1;
}
tBBDLen = (size_t)(lMaxBlock + 1);
tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
DBG_DEC(tNumBbdBlocks);
lRootStartblock = (long)ulReadLong(pFile, 0x30);
DBG_DEC(lRootStartblock);
lSbdStartblock = (long)ulReadLong(pFile, 0x3c);
DBG_DEC(lSbdStartblock);
lAdditionalBBDlist = (long)ulReadLong(pFile, 0x44);
DBG_DEC(lAdditionalBBDlist);
DBG_HEX(lAdditionalBBDlist);
lSBLstartblock = (long)ulReadLong(pFile,
(lRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
DBG_DEC(lSBLstartblock);
iMaxSmallBlock = (int)(ulReadLong(pFile,
(lRootStartblock + 1) *
BIG_BLOCK_SIZE + 0x78) / SMALL_BLOCK_SIZE) - 1;
DBG_DEC(iMaxSmallBlock);
tSBDLen = (size_t)(iMaxSmallBlock + 1);
/* All to be xmalloc-ed pointers to NULL */
alRootList = NULL;
alSbdList = NULL;
alBbdList = NULL;
alSBD = NULL;
alBBD = NULL;
/* Big Block Depot */
alBbdList = xmalloc(tNumBbdBlocks * sizeof(long));
alBBD = xmalloc(tBBDLen * sizeof(long));
iToGo = (int)tNumBbdBlocks;
vGetBbdList(pFile, min(iToGo, 109), alBbdList, 0x4c);
iStart = 109;
iToGo -= 109;
while (lAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
lBdbListStart = (lAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
vGetBbdList(pFile, min(iToGo, 127),
alBbdList + iStart, lBdbListStart);
lAdditionalBBDlist = (long)ulReadLong(pFile,
lBdbListStart + 4 * 127);
DBG_DEC(lAdditionalBBDlist);
DBG_HEX(lAdditionalBBDlist);
iStart += 127;
iToGo -= 127;
}
if (!bGetBBD(pFile, alBbdList, tNumBbdBlocks, alBBD, tBBDLen)) {
FREE_ALL();
return -1;
}
alBbdList = xfree(alBbdList);
/* Small Block Depot */
alSbdList = xmalloc(tBBDLen * sizeof(long));
alSBD = xmalloc(tSBDLen * sizeof(long));
for (iIndex = 0, lTmp = lSbdStartblock;
iIndex < (int)tBBDLen && lTmp != END_OF_CHAIN;
iIndex++, lTmp = alBBD[lTmp]) {
if (lTmp < 0 || lTmp >= (long)tBBDLen) {
DBG_DEC(lTmp);
DBG_DEC(tBBDLen);
werr(1, "The Big Block Depot is damaged");
}
alSbdList[iIndex] = lTmp;
NO_DBG_HEX(alSbdList[iIndex]);
}
if (!bGetSBD(pFile, alSbdList, tBBDLen, alSBD, tSBDLen)) {
FREE_ALL();
return -1;
}
alSbdList = xfree(alSbdList);
/* Root list */
for (tRootListLen = 0, lTmp = lRootStartblock;
tRootListLen < tBBDLen && lTmp != END_OF_CHAIN;
tRootListLen++, lTmp = alBBD[lTmp]) {
if (lTmp < 0 || lTmp >= (long)tBBDLen) {
DBG_DEC(lTmp);
DBG_DEC(tBBDLen);
werr(1, "The Big Block Depot is damaged");
}
}
if (tRootListLen == 0) {
werr(0, "No Rootlist found");
FREE_ALL();
return -1;
}
alRootList = xmalloc(tRootListLen * sizeof(long));
for (iIndex = 0, lTmp = lRootStartblock;
iIndex < (int)tBBDLen && lTmp != END_OF_CHAIN;
iIndex++, lTmp = alBBD[lTmp]) {
if (lTmp < 0 || lTmp >= (long)tBBDLen) {
DBG_DEC(lTmp);
DBG_DEC(tBBDLen);
werr(1, "The Big Block Depot is damaged");
}
alRootList[iIndex] = lTmp;
NO_DBG_DEC(alRootList[iIndex]);
}
fail(tRootListLen != (size_t)iIndex);
bSuccess = bGetPPS(pFile, alRootList, tRootListLen, &PPS_info);
alRootList = xfree(alRootList);
if (!bSuccess) {
FREE_ALL();
return -1;
}
/* Small block list */
if (!bCreateSmallBlockList(lSBLstartblock, alBBD, tBBDLen)) {
FREE_ALL();
return -1;
}
if (PPS_info.tWordDocument.lSize < MIN_SIZE_FOR_BBD_USE) {
DBG_DEC(PPS_info.tWordDocument.lSize);
FREE_ALL();
werr(0, "I'm afraid the text stream of this file "
"is too small to handle.");
return -1;
}
/* Read the headerblock */
if (!bReadBuffer(pFile, PPS_info.tWordDocument.lSb,
alBBD, tBBDLen, BIG_BLOCK_SIZE,
aucHeader, 0, HEADER_SIZE)) {
FREE_ALL();
return -1;
}
usIdent = usGetWord(0x00, aucHeader);
DBG_HEX(usIdent);
fail(usIdent != 0x8098 && /* Word 7 for oriental languages */
usIdent != 0x8099 && /* Word 7 for oriental languages */
usIdent != 0xa5dc && /* Word 6 & 7 */
usIdent != 0xa5ec && /* Word 7 & 97 & 98 */
usIdent != 0xa697 && /* Word 7 for oriental languages */
usIdent != 0xa699); /* Word 7 for oriental languages */
iWordVersion = iGetVersionNumber(aucHeader);
if (iWordVersion < 6) {
FREE_ALL();
werr(0, "This file is from a version of Word before Word 6.");
return -1;
}
bSuccess = bGetDocumentText(pFile, &PPS_info,
alBBD, tBBDLen, alSBD, tSBDLen,
aucHeader, iWordVersion);
if (bSuccess) {
vGetDocumentData(pFile, &PPS_info,
alBBD, tBBDLen, aucHeader, iWordVersion);
vSetDefaultTabWidth(pFile, &PPS_info,
alBBD, tBBDLen, alSBD, tSBDLen,
aucHeader, iWordVersion);
vGetPropertyInfo(pFile, &PPS_info,
alBBD, tBBDLen, alSBD, tSBDLen,
aucHeader, iWordVersion);
vGetNotesInfo(pFile, &PPS_info,
alBBD, tBBDLen, alSBD, tSBDLen,
aucHeader, iWordVersion);
}
FREE_ALL();
return bSuccess ? iWordVersion : -1;
} /* end of iInitDocument */
/*
* vFreeDocument - free a document by free-ing its parts
*/
void
vFreeDocument(void)
{
DBG_MSG("vFreeDocument");
/* Free the memory */
vDestroyTextBlockList();
vDestroyDataBlockList();
vDestroyRowInfoList();
vDestroyStyleInfoList();
vDestroyFontInfoList();
vDestroyPicInfoList();
vDestroyPropModList();
vDestroyNotesInfoLists();
vDestroyFontTable();
} /* end of vFreeDocument */
/*
* Common part of the file checking functions
*/
static BOOL
bCheckBytes(FILE *pFile, const unsigned char *aucBytes, size_t tBytes)
{
int iIndex, iChar;
fail(pFile == NULL || aucBytes == NULL || tBytes == 0);
rewind(pFile);
for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
iChar = getc(pFile);
if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
DBG_HEX(iChar);
DBG_HEX(aucBytes[iIndex]);
return FALSE;
}
}
return TRUE;
} /* end of bCheckBytes */
/*
* This function checks whether the given file is or is not a Word6 (or later)
* document
*/
BOOL
bIsSupportedWordFile(FILE *pFile, long lFilesize)
{
static unsigned char aucBytes[] =
{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
if (pFile == NULL || lFilesize < 0) {
DBG_MSG("No proper file given");
return FALSE;
}
if (lFilesize < (long)BIG_BLOCK_SIZE * 3 ||
lFilesize % BIG_BLOCK_SIZE != 0) {
DBG_DEC(lFilesize);
DBG_MSG("File size mismatch");
return FALSE;
}
return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
} /* end of bIsSupportedWordFile */
/*
* This function checks whether the given file is or is not a "Word2, 4, 5"
* document
*/
BOOL
bIsWord245File(FILE *pFile)
{
static unsigned char aucBytes[6][8] = {
{ 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00 },
{ 0xdb, 0xa5, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x04 },
{ 0xdb, 0xa5, 0x2d, 0x00, 0x31, 0x40, 0x09, 0x08 },
{ 0xdb, 0xa5, 0x2d, 0x00, 0x31, 0x40, 0x09, 0x0c },
{ 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00 },
{ 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00 },
};
int iIndex;
DBG_MSG("bIsWord245File");
for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
if (bCheckBytes(pFile,
aucBytes[iIndex],
elementsof(aucBytes[iIndex]))) {
return TRUE;
}
}
return FALSE;
} /* end of bIsWord245File */
/*
* This function checks whether the given file is or is not a RTF document
*/
BOOL
bIsRtfFile(FILE *pFile)
{
static unsigned char aucBytes[] =
{ '{', '\\', 'r', 't', 'f', '1' };
DBG_MSG("bIsRtfFile");
return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
} /* end of bIsRtfFile */