OS/2 Shareware BBS: 10 Tools

home *** CD-ROM | disk | FTP | other *** search

/ OS/2 Shareware BBS: 10 Tools / 10-Tools.zip / xwplascr.zip / XWPL0208.ZIP / tools / h2i / h2i.cpp next >

Wrap

C/C++ Source or Header | 2002-07-21 | 95KB | 3,060 lines

/* *@@sourcefile h2i.cpp: * the one and only source file for h2i.exe. * * h2i is a replacement for HTML2IPF.CMD and now used * by the XWorkplace and WarpIN makefiles to convert * the various HTML sources in 001\ to IPF code, which * can then be fed into IPFC. * * Compared to HTML2IPF.CMD, h2i has the following * advantages: * * -- Even though it's fairly sloppy code, it's * MAGNITUDES faster. * * -- It can process C include files to allow for * character entity references. In those include * files, only #define statements are evaluated. * This can be used for string replacements. * * -- It supports IFDEF and IFNDEF blocks like this: * + <IFDEF some_c_definition> + Lots of text here, which can optionally + be ignored. + </IFDEF> * * -- It supports a RESID attribute to the HTML * tag to allow for setting a resid explicitly. * Together with the include facility, this * is very useful for the XWorkplace help file. * * -- This also fixes some minor formatting bugs * that HTML2IPF.CMD exhibited, mostly with * lists and PRE sections. * * Current limitations: * * -- This does not automatically convert GIFs or * JPEGs to OS/2 1.3 BMP files, like HTML2IPF.CMD * does. * * -- <HTML SUBLINKS> and <HTML NOSUBLINKS> are not * yet supported. * *@@header "h2i.h" *@@added V0.9.13 (2001-06-23) [umoeller] */ /* * Copyright (C) 2001-2002 Ulrich Möller. * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, in version 2 as it comes in the COPYING * file of the XFolder main distribution. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #define OS2EMX_PLAIN_CHAR #include <stdlib.h> #include <string.h> #include <stdio.h> #include <setjmp.h> #include <io.h> #define INCL_DOS #define INCL_DOSERRORS #include <os2.h> #include "setup.h" #include "bldlevel.h" #include "helpers\dosh.h" #include "helpers\except.h" #include "helpers\linklist.h" #include "helpers\standards.h" #include "helpers\stringh.h" #include "helpers\tree.h" #include "helpers\xstring.h" #include "h2i.h" #pragma info (nocnv) /* ****************************************************************** * * NLS strings * ********************************************************************/ PCSZ G_pcszResourcesOnInternetTitle = "Resources on the Internet", G_pcszResourcesOnInternetBody = "This chapter contains all external links referenced in this book. " "Each link contained herein is an Unified Resource Locator (URL) " "to a certain location on the Internet. Simply double-click on one " "of them to launch Netscape with the respective URL.", G_pcszClickBelow = "Click below to launch Netscape with this URL&colon."; /* ****************************************************************** * * Private declarations * ********************************************************************/ /* *@@ DEFINENODE: * represents a #define from a C header. */ typedef struct _DEFINENODE { TREE Tree; // Tree.ulKey has the (PSZ) identifier PSZ pszValue; // value without quotes ULONG ulValueLength; // strlen(pszValue) } DEFINENODE, *PDEFINENODE; /* *@@ ARTICLETREENODE: * represents one article to be worked on. * * This comes from one HTML file each and * will turn into an IPF article. */ typedef struct _ARTICLETREENODE { TREE Tree; // Tree.ulKey has the (PSZ) filename ULONG ulHeaderLevel; // IPF header level (1 if root file; // -1 if link for "Internet" section) ULONG ulResID; // the resid of this link BOOL fProcessed; // TRUE after this has been parsed; // if FALSE, everything below is // still undefined // after parsing, the following are valid: XSTRING strIPF; // mostly translated IPF source XSTRING strTitle; struct _ARTICLETREENODE *pFirstReferencedFrom; // misc data from HTML source LONG lGroup; PSZ pszWidth, pszXPos; BOOL fHidden; // sublinks, nosublinks BOOL fWritten; // TRUE after this article has been // written to the main buffer; this // is used when we sort out parent // articles so we won't write the // article twice } ARTICLETREENODE, *PARTICLETREENODE; #define LIST_UL 1 #define LIST_OL 2 #define LIST_DL 3 /* *@@ STATUS: * parser status. */ typedef struct _STATUS { // temp data for handlers PSZ pSource; // current source pointer; // with HandleTag: // in: points to '<' char, // out: should point to '>' char PSZ pNextClose; // ptr to next closing tag while in HandleTag // accumulated line length (for inserting line breaks automatically) ULONG ulLineLength; // current state of formatting BOOL fInHead, fItalics, fBold, fCode, fUnderlined, fInPre, fInTable, fJustHadSpace, fNeedsP; ULONG ulInLink; // 0: not in <A block // 1: in regular <A HREF= block, must be closed // 2: in special <A AUTO= block, must not be closed XSTRING strLinkTag; // if ulInLink > 0, current link tag (for // image link support); this has something // like // :link reftype=hd res=58. // V0.9.20 (2002-07-12) [umoeller] LINKLIST llListStack; // linklist abused as a stack for list tags; // the list item is simply a ULONG with a // LISTFL_* value ULONG ulDefinition; // if inside a <DL>, this is 1 if last item was // a DD BOOL fFatal; // if error is returned and this is TRUE, // processing is stopped; otherwise the // error is considered a warning only ULONG ulNestingIFDEFs, ulNestingIFNDEFs; } STATUS, *PSTATUS; /* ****************************************************************** * * Global variables * ********************************************************************/ TREE *G_DefinesTreeRoot; ULONG G_cDefines = 0; ULONG G_ulReplacements = 0; TREE *G_LinkIDsTreeRoot; LINKLIST G_llFiles2Process; ULONG G_ulVerbosity = 1; BOOL G_fNoMoveToRoot = FALSE; XSTRING G_strError, // string buffer for error msgs G_strCrashContext; /* ****************************************************************** * * misc * ********************************************************************/ /* *@@ Error: * * ulCategory is: * * -- 0: command line syntax error. * * -- 1: HTML syntax warning. * * -- 2: fatal error. */ VOID Error(ULONG ulCategory, const char *pcszFile, ULONG ulLine, const char *pcszFunction, const char *pcszFormat, // in: format string (like with printf) ...) // in: additional stuff (like with printf) { if (G_ulVerbosity <= 1) printf("\n"); if (G_ulVerbosity) printf("h2i (%s line %u, %s)", pcszFile, ulLine, pcszFunction); else printf("h2i"); if (pcszFormat) { va_list args; switch (ulCategory) { case 0: printf(": Error in command line syntax. "); break; case 1: printf(" HTML warning: "); break; case 2: printf(" error: "); break; } va_start(args, pcszFormat); vprintf(pcszFormat, args); va_end(args); } if (ulCategory == 0) printf("\nType 'h2i -h' for help.\n"); else printf("\n"); } #define H2I_HEADER "h2i V"BLDLEVEL_VERSION" ("__DATE__") (C) 2001-2002 Ulrich Möller" /* * PrintHeader: * * */ VOID PrintHeader(VOID) { printf(H2I_HEADER "\n"); printf(" Part of the XWorkplace package.\n"); printf(" This is free software under the GNU General Public Licence (GPL).\n"); printf(" Refer to the COPYING file in the XWorkplace installation dir for details.\n"); } /* *@@ Explain: * */ VOID Explain(const char *pcszFormat, // in: format string (like with printf) ...) // in: additional stuff (like with printf) { if (pcszFormat) { va_list args; printf("h2i error: "); va_start(args, pcszFormat); vprintf(pcszFormat, args); va_end(args); printf("\n"); } PrintHeader(); printf("h2i (for 'html to ipf') translates a large bunch of HTML files into a\n"); printf("single IPF file, which can then be fed into IBM's ipfc compiler.\n"); printf("Usage: h2i [-i<include>]... [-v[0|1|2|3]] [-s] [-r] <root.htm>\n"); printf("with:\n"); printf(" <root.htm> the HTML file to start with. This better link to other files.\n"); printf(" -i<include> include a C header, whose #define statements are parsed\n"); printf(" to allow for entity references (&ent;).\n"); printf(" -v[0|1|2|3] specify verbosity level. If not specified, defaults to 1;\n"); printf(" if -v only is specified, uses 2.\n"); printf(" -s show some statistics when done.\n"); printf(" -r show only <root.htm> at level 1 in the TOC; otherwise all\n"); printf(" second-level files are moved to level 1 as well.\n"); } /* *@@ fnCompareStrings: * tree comparison func (src\helpers\tree.c). * *@@added V0.9.9 (2001-04-07) [umoeller] */ int TREEENTRY fnCompareStrings(ULONG ul1, ULONG ul2) { return (strhicmp((const char *)ul1, (const char *)ul2)); } /* ****************************************************************** * * Parser helpers * ********************************************************************/ /* *@@ AddDefinition: * */ BOOL AddDefinition(PDEFINENODE p) { return (!treeInsert(&G_DefinesTreeRoot, NULL, (TREE*)p, fnCompareStrings)); } /* *@@ FindDefinition: * */ PDEFINENODE FindDefinition(const char *pcszIdentifier) { return ((PDEFINENODE)treeFind(G_DefinesTreeRoot, (ULONG)pcszIdentifier, fnCompareStrings)); } /* *@@ ResolveEntities: * */ APIRET ResolveEntities(PARTICLETREENODE pFile2Process, PXSTRING pstrSource) { if (G_cDefines) { // we have include files: PSZ p = pstrSource->psz; // extract the entity (between '&' and ';') while (p = strchr(p, '&')) { PSZ p2; if (!(p2 = strchr(p, ';'))) p++; else { // find the definition for the entity in // the global list; so make this null-terminated // first *p2 = '\0'; PDEFINENODE pDef = 0; if (pDef = FindDefinition(p + 1)) { if (G_ulVerbosity > 2) printf("\n found entity \"%s\" --> \"%s\"", p + 1, pDef->pszValue); *p2 = ';'; // replace the entity string with the // definition string ULONG ulPos = (p - pstrSource->psz), ulReplLen = pDef->ulValueLength; xstrrpl(pstrSource, // first ofs to replace: ulPos, // char count to replace: (p2 + 1) - p, // include ';' // replacement string: pDef->pszValue, ulReplLen); // string buffer might have changed, so // re-adjust source position p = pstrSource->psz + ulPos; G_ulReplacements++; } else { // not found: // if it's not one of the standards we'll // replace later, complain if ( (strcmp(p + 1, "gt")) && (strcmp(p + 1, "lt")) && (strcmp(p + 1, "amp")) ) { Error(1, __FILE__, __LINE__, __FUNCTION__, "Unknown entity \"%s;\" at line %d in file \"%s\".", p, strhCount(pstrSource->psz, '\n') + 1, (PSZ)pFile2Process->Tree.ulKey); // filename } *p2 = ';'; p = p2 + 1; } } } } } /* *@@ ConvertEscapes: * */ const char *ConvertEscapes(PXSTRING pstr) { const char *papszSources[] = { "&", "<", ">" }; const char *papszTargets[] = { "&.", "<", ">" }; ULONG ul; for (ul = 0; ul < ARRAYITEMCOUNT(papszSources); ul++) { ULONG ulOfs = 0; while (xstrFindReplaceC(pstr, &ulOfs, papszSources[ul], papszTargets[ul])) ; } } /* *@@ GetOrCreateArticle: * returns a link ID (resid) for the * specified filename. * * If this is called for the first time for * this file name, a new ID is created and * returned. Otherwise the existing one * is returned. * * Special flags for ulCurrentLevel: * * -- 0: create root article (will receive 1 then) * * -- -1: do not create if it doesn't exist. * * If a file is created, it will intially * carry a resid of -1. If the file has a * RESID attribute to the HTML tag, ParseFile * will reset the resid then. Later we go * through all files and check which ones * are still -1 and assign random values then. */ PARTICLETREENODE GetOrCreateArticle(const char *pcszFilename, ULONG ulCurrentLevel, PARTICLETREENODE pParent) { PARTICLETREENODE pMapping; // check if we have an article for this file name already if (pMapping = (PARTICLETREENODE)treeFind(G_LinkIDsTreeRoot, (ULONG)pcszFilename, fnCompareStrings)) // exists: return (pMapping); else if (ulCurrentLevel != -1) { // create new one if (pMapping = NEW(ARTICLETREENODE)) { ZERO(pMapping); pMapping->Tree.ulKey = (ULONG)strhdup(pcszFilename, NULL); pMapping->ulHeaderLevel = ulCurrentLevel + 1; pMapping->ulResID = -1; // for now pMapping->pFirstReferencedFrom = pParent; xstrInit(&pMapping->strIPF, 0); xstrInit(&pMapping->strTitle, 0); if (!treeInsert(&G_LinkIDsTreeRoot, NULL, (TREE*)pMapping, fnCompareStrings)) { // this is a new file... // if it's not an HTML link, check if this exists if (access(pcszFilename, 0)) { // file does not exist: if ( (strncmp(pcszFilename, "http://", 7)) && (strncmp(pcszFilename, "ftp://", 6)) && (strncmp(pcszFilename, "mailto:", 7)) ) { if (G_ulVerbosity) printf("\n Warning: Link from \"%s\" to \"%s\" was not found.", (pParent) ? (PSZ)pParent->Tree.ulKey // pszFilename, : "none", (PSZ)pMapping->Tree.ulKey); // pszFilename, } // do not try to load this file! pMapping->ulHeaderLevel = -1; // special flag } // since this is new, add it to the list of // things to be processed too lstAppendItem(&G_llFiles2Process, pMapping); return (pMapping); } } } return (0); } /* ****************************************************************** * * Tag handlers * ********************************************************************/ /* *@@ CheckP: * adds a :p. if this was markes as due * in STATUS. When we encounter in HTML, * we never insert :p. directly but only * set the flag in STATUS so we can do * additional checks, mostly for better * <LI> formatting. This gets called from * various locations before appending * other character data. */ VOID CheckP(PXSTRING pxstrIPF, PSTATUS pstat) // in/out: parser status { if (pstat->fNeedsP) { // we just had a tag previously: if (pstat->fInPre) xstrcatc(pxstrIPF, '\n'); else xstrcat(pxstrIPF, "\n:p.\n", 0); pstat->ulLineLength = 0; pstat->fJustHadSpace = TRUE; pstat->fNeedsP = FALSE; } } /* *@@ PushList: * pushes the specified list on the list stack. */ VOID PushList(PSTATUS pstat, // in/out: parser status ULONG ulList) // in: LIST_* flag { lstAppendItem(&pstat->llListStack, (PVOID)ulList); } /* *@@ CheckListTop: * returns TRUE if the top item on the list * stack matches the given LIST_* flag. */ BOOL CheckListTop(PSTATUS pstat, // in: status ULONG ulList) // in: LIST_* flag { PLISTNODE pNode; if (pNode = lstQueryLastNode(&pstat->llListStack)) if ((ULONG)pNode->pItemData == ulList) return (TRUE); return (FALSE); } /* *@@ PopList: * removes the top item from the list stack. */ VOID PopList(PSTATUS pstat) // in/out: parser status { lstRemoveNode(&pstat->llListStack, lstQueryLastNode(&pstat->llListStack)); } typedef PCSZ TAGHANDLER(PARTICLETREENODE pFile2Process, // in: file to process PXSTRING pxstrIPF, // out: IPF string PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs); // in: attributes typedef TAGHANDLER *PTAGHANDLER; /* *@@ HandleHTML: * */ PCSZ HandleHTML(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, // out: IPF string PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (pszAttrs) { PSZ pszAttrib; if (pszAttrib = strhGetTextAttr(pszAttrs, "GROUP", NULL)) { pFile2Process->lGroup = atoi(pszAttrib); free(pszAttrib); } if (pszAttrib = strhGetTextAttr(pszAttrs, "WIDTH", NULL)) { pFile2Process->pszWidth = pszAttrib; } if (pszAttrib = strhGetTextAttr(pszAttrs, "XPOS", NULL)) { pFile2Process->pszXPos = pszAttrib; } if (pszAttrib = strhGetTextAttr(pszAttrs, "HIDDEN", NULL)) { pFile2Process->fHidden = TRUE; free(pszAttrib); } if (pszAttrib = strhGetTextAttr(pszAttrs, "RESID", NULL)) { pFile2Process->ulResID = atoi(pszAttrib); if (!pFile2Process->ulResID) return ("Invalid RESID in <HTML> tag."); free(pszAttrib); } if (pszAttrib = strhGetTextAttr(pszAttrs, "SUBLINKS", NULL)) { // @@todo } if (pszAttrib = strhGetTextAttr(pszAttrs, "NOSUBLINKS", NULL)) { // @@todo } } } return (0); } /* *@@ HandleHEAD: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleHEAD(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, // out: IPF string PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) pstat->fInHead = TRUE; // this flag starts skipping all characters else pstat->fInHead = FALSE; // stop skipping all characters return 0; } /* *@@ HandleTITLE: * */ PCSZ HandleTITLE(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, // out: IPF string PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { // TITLE tag... // we need to remove whitespace from the // front and the tail, so play a bit here PSZ p3 = pstat->pNextClose + 1; while ( *p3 && ( (*p3 == ' ') || (*p3 == '\n') || (*p3 == '\t') ) ) p3++; if (!*p3) return ("Incomplete <TITLE>."); // find closing title tag PSZ p4 = strstr(p3, "</TITLE>"); if (!p4) return ("<TITLE> has no closing tag."); else { // from beginning of title, go backwards // until the last non-whitespace PSZ p5 = p4 - 1; while ( p5 > p3 && ( (*p5 == ' ') || (*p5 == '\n') || (*p5 == '\t') ) ) p5--; if (p5 > p3) { xstrcpy(&pFile2Process->strTitle, p3, p5 - p3 + 1); ResolveEntities(pFile2Process, &pFile2Process->strTitle); // search on after </TITLE> pstat->pNextClose = p4 + 7; } else return ("Empty <TITLE> block."); } } return (0); } /* *@@ HandleIFDEF: * */ PCSZ HandleIFDEF(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, // out: IPF string PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { // IFDEF tag... // get the define from the attributes PCSZ pDef = pszAttrs; while ( (*pDef) && (*pDef == ' ') ) pDef++; if (*pDef) { PDEFINENODE pNode; if (!(pNode = FindDefinition(pDef))) { // definition _not_ found: then we must _skip_ everything PSZ pEndOfSkip; if (pEndOfSkip = strstr(pstat->pNextClose + 1, "</IFDEF>")) { // search on after </IFDEF> if (G_ulVerbosity > 1) printf("\n<IFDEF '%s'> is FALSE, skipping %d chars", pDef, pEndOfSkip + 8 - pstat->pSource); pstat->pNextClose = pEndOfSkip + 7; } else return ("Cannot find closing </IFDEF> tag"); } else (pstat->ulNestingIFDEFs)++; } } else if (pstat->ulNestingIFDEFs) (pstat->ulNestingIFDEFs)--; else return ("Unrelated closing </IFDEF> tag"); return (0); } /* *@@ HandleIFNDEF: * */ PCSZ HandleIFNDEF(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, // out: IPF string PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { // IFNDEF tag... // get the define from the attributes PCSZ pDef = pszAttrs; while ( (*pDef) && (*pDef == ' ') ) pDef++; if (*pDef) { PDEFINENODE pNode; if (pNode = FindDefinition(pDef)) { // definition _found_: then we must _skip_ everything PSZ pEndOfSkip; if (pEndOfSkip = strstr(pstat->pNextClose + 1, "</IFNDEF>")) { // search on after </IFNDEF> if (G_ulVerbosity > 1) printf("\n<IFNDEF '%s'> is FALSE, skipping %d chars", pDef, pEndOfSkip + 9 - pstat->pSource); pstat->pNextClose = pEndOfSkip + 8; } else return ("Cannot find closing </IFNDEF> tag"); } else (pstat->ulNestingIFNDEFs)++; } } else if (pstat->ulNestingIFNDEFs) (pstat->ulNestingIFNDEFs)--; else return ("Unrelated closing </IFNDEF> tag"); return (0); } /* *@@ HandleP: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleP(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) // mark this only for now since we don't // know yet whether a <LI> comes next... // if we do <LI>, we get huge spaces pstat->fNeedsP = TRUE; return 0; } /* *@@ HandleBR: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleBR(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (!pstat->fInTable) { // IPF cannot handle .br in tables xstrcat(pxstrIPF, "\n.br\n", 0); pstat->ulLineLength = 0; } } return 0; } /* *@@ HandleHR: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleHR(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { ULONG ul; xstrcat(pxstrIPF, ":cgraphic.", 0); for (ul = 0; ul < 80; ul++) xstrcatc(pxstrIPF, '_'); xstrcat(pxstrIPF, ":ecgraphic.", 0); pstat->fNeedsP = TRUE; } return 0; } /* *@@ HandleI: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleI(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (pstat->fItalics) return ("Nested or tag."); else { xstrcat(pxstrIPF, ":hp1.", 0); pstat->fItalics = TRUE; } } else { if (!pstat->fItalics) return ("Unrelated closing or tag."); else { xstrcat(pxstrIPF, ":ehp1.", 0); pstat->fItalics = FALSE; } } return 0; } /* *@@ HandleB: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleB(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (pstat->fBold) return ("Nested or tag."); else { xstrcat(pxstrIPF, ":hp2.", 0); pstat->fBold = TRUE; } } else { if (!pstat->fBold) return ("Unrelated closing or tag."); else { xstrcat(pxstrIPF, ":ehp2.", 0); pstat->fBold = FALSE; } } return 0; } /* *@@ HandleU: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleU(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (pstat->fUnderlined) return ("Nested tag."); else { xstrcat(pxstrIPF, ":hp5.", 0); pstat->fUnderlined = TRUE; } } else { if (!pstat->fUnderlined) return ("Unrelated closing tag."); else { xstrcat(pxstrIPF, ":ehp5.", 0); pstat->fUnderlined = FALSE; } } return 0; } /* *@@ HandleCODE: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleCODE(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (pstat->fCode) return ("Nested <CODE>, <CITE>, or <TT> tag."); else { xstrcat(pxstrIPF, ":font facename='Courier' size=18x12.", 0); pstat->fCode = TRUE; } } else { if (!pstat->fCode) return ("Unrelated closing </CODE>, </CITE>, or </TT> tag."); else { xstrcat(pxstrIPF, ":font facename=default size=0x0.", 0); pstat->fCode = FALSE; } } return 0; } /* *@@ HandlePRE: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandlePRE(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (pstat->fInPre) return ("Nested <PRE> tag."); else { xstrcat(pxstrIPF, "\n:cgraphic.", 0); pstat->fInPre = TRUE; } } else { if (!pstat->fInPre) return ("Unrelated closing </PRE> tag."); else { xstrcat(pxstrIPF, "\n:ecgraphic.", 0); pstat->fInPre = FALSE; pstat->fNeedsP = TRUE; } } return 0; } /* *@@ HandleUL: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleUL(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { xstrcat(pxstrIPF, "\n:ul compact.", 0); PushList(pstat, LIST_UL); pstat->ulLineLength = 0; } else { if (CheckListTop(pstat, LIST_UL)) { xstrcat(pxstrIPF, "\n:eul.", 0); PopList(pstat); pstat->fNeedsP = TRUE; } else return ("Invalid </UL> nesting."); } return 0; } /* *@@ HandleOL: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleOL(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { xstrcat(pxstrIPF, "\n:ol compact.", 0); PushList(pstat, LIST_OL); pstat->ulLineLength = 0; } else { if (CheckListTop(pstat, LIST_OL)) { xstrcat(pxstrIPF, "\n:eol.", 0); PopList(pstat); pstat->fNeedsP = TRUE; } else return ("Invalid </OL> nesting."); } return 0; } /* *@@ HandleLI: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleLI(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if ( (CheckListTop(pstat, LIST_UL)) || (CheckListTop(pstat, LIST_OL)) ) { if (pstat->fNeedsP) { // we just had a previously: // add a .br instead or we'll get huge // whitespace xstrcat(pxstrIPF, "\n.br\n", 0); pstat->fNeedsP = FALSE; } xstrcat(pxstrIPF, ":li.\n", 0); pstat->ulLineLength = 0; } else return ("<LI> outside list."); } return 0; } /* *@@ HandleDL: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleDL(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { xstrcat(pxstrIPF, "\n:dl compact break=all.", 0); PushList(pstat, LIST_DL); pstat->ulLineLength = 0; } else { if (CheckListTop(pstat, LIST_DL)) { if (!pstat->ulDefinition) // if the list didn't stop with a DD last, // we must add one or IPFC will choke xstrcat(pxstrIPF, "\n:dd.", 0); xstrcat(pxstrIPF, "\n:edl.", 0); PopList(pstat); pstat->fNeedsP = TRUE; pstat->ulDefinition = 0; } else return ("Invalid </DL> nesting."); } return 0; } /* *@@ HandleDD: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleDD(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (!CheckListTop(pstat, LIST_DL)) return ("<DD> outside of <DL>."); else { xstrcat(pxstrIPF, "\n:dd.", 0); pstat->ulLineLength = 0; pstat->ulDefinition = 1; } } return 0; } /* *@@ HandleDT: * *@@added V0.9.16 (2001-11-22) [umoeller] */ PCSZ HandleDT(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (!CheckListTop(pstat, LIST_DL)) return ("<DT> outside of <DL>."); else { xstrcat(pxstrIPF, "\n:dt.", 0); pstat->ulLineLength = 0; pstat->ulDefinition = 0; } } return 0; } #define START_KEY "@#!LINK1@#!" #define END_KEY "@#!LINK2@#!" /* *@@ HandleA: * *@@changed V0.9.20 (2002-07-06) [umoeller]: added support for cpref etc. toolkit INF variables +@@changed V0.9.20 (2002-07-12) [umoeller]: added support for A HREF="helpfile.hlp#panel" */ PCSZ HandleA(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { if (!fClosingTag) { if (pstat->ulInLink) return ("Nested <A ...> blocks."); else { // check the attribute: // is it HREF? if (pszAttrs) { PSZ pszAttrib; if (pszAttrib = strhGetTextAttr(pszAttrs, "AUTO", NULL)) { PARTICLETREENODE patn = GetOrCreateArticle(pszAttrib, pFile2Process->ulHeaderLevel, pFile2Process); // V0.9.20 (2002-07-12) [umoeller] xstrPrintf(&pstat->strLinkTag, // hack in the @#!LINK@#! string for now; // this is later replaced with the // resid in ParseFiles ":link reftype=hd res=" START_KEY "%s" END_KEY " auto dependent.", pszAttrib); xstrcats(pxstrIPF, &pstat->strLinkTag); pstat->ulInLink = 2; // special, do not close } else if (pszAttrib = strhGetTextAttr(pszAttrs, "HREF", NULL)) { PSZ pExt; if (pExt = strhistr(pszAttrib, ".INF")) { // special INF cross-link support: // V0.9.20 (2002-07-06) [umoeller] // remove .INF and the following # if any; // this allows us to specify "cpref.inf#func" // in the sources even though cpref is really // an environment variable defined by the // toolkit install PSZ p3 = pExt + 4; if (*p3 == '#') { *p3 = ' '; } strcpy(pExt, p3); // V0.9.20 (2002-07-12) [umoeller] xstrPrintf(&pstat->strLinkTag, ":link reftype=launch object='view.exe' data='%s'.", pszAttrib); xstrcats(pxstrIPF, &pstat->strLinkTag); } else if (pExt = strhistr(pszAttrib, ".HLP")) { // V0.9.20 (2002-07-06) [umoeller] // special HLP cross-link support: // syntax is "helpfile.hlp#panel" here PSZ p3 = pExt + 4; if (*p3 == '#') { *p3 = '\0'; // V0.9.20 (2002-07-12) [umoeller] xstrPrintf(&pstat->strLinkTag, ":link reftype=hd refid=%d database='%s'.", atoi(p3 + 1), pszAttrib); xstrcats(pxstrIPF, &pstat->strLinkTag); } else return ("Invalid syntax in <A HREF=\"file.hlp#panel\"."); } else { // no INF: PARTICLETREENODE patn = GetOrCreateArticle(pszAttrib, pFile2Process->ulHeaderLevel, pFile2Process); // V0.9.20 (2002-07-12) [umoeller] xstrPrintf(&pstat->strLinkTag, // hack in the @#!LINK@#! string for now; // this is later replaced with the // resid in ParseFiles ":link reftype=hd res=" START_KEY "%s" END_KEY ".", pszAttrib); xstrcats(pxstrIPF, &pstat->strLinkTag); } pstat->ulInLink = 1; // regular, do close free(pszAttrib); } else return ("Unknown attribute to <A> tag."); } else return ("<A> tag has no attributes."); } } // end if (!fClosingTag) else { if (pstat->ulInLink) { if (pstat->ulInLink == 1) xstrcat(pxstrIPF, ":elink.", 0); // otherwise 2: that's from A AUTO, do not close pstat->ulInLink = 0; } else return ("Unrelated closing </A> tag."); } return (0); } /* *@@ HandleIMG: * */ PCSZ HandleIMG(PARTICLETREENODE pFile2Process, PXSTRING pxstrIPF, PSTATUS pstat, // in/out: parser status BOOL fClosingTag, // in: TRUE == closing tag, FALSE == opening tag PSZ pszAttrs) { PSZ pszAttrib; if (pszAttrib = strhGetTextAttr(pszAttrs, "SRC", NULL)) { XSTRING str; xstrInitSet(&str, pszAttrib); PSZ p = strrchr(pszAttrib, '.'); if ( (!p) || (strlen(p) < 3) ) xstrReserve(&str, CCHMAXPATH); strcpy(p, ".bmp"); if (access(str.psz, 0)) // doesn't exist: Error(1, __FILE__, __LINE__, __FUNCTION__, "The bitmap file \"%s\" was not found.", str.psz); if (pstat->ulInLink == 1) // if we are inside a link, close it first // V0.9.20 (2002-07-12) [umoeller] xstrcat(pxstrIPF, ":elink.", 0); if (pstat->fNeedsP) // if we had a just before, do not use runin xstrCatf(pxstrIPF, "\n:artwork name='%s' align=left.", str.psz); else xstrCatf(pxstrIPF, "\n:artwork name='%s' runin align=left.", str.psz); // if we are inside a link, add special tag // :artwork name='img/xfwps_mini.bmp' runin align=left. // :artlink.:link reftype=hd res=58.:eartlink. // V0.9.20 (2002-07-12) [umoeller] if (pstat->ulInLink) { xstrCatf(pxstrIPF, "\n:artlink.\n%s\n:eartlink.\n", pstat->strLinkTag.psz); // e.g. ":link reftype=hd res=58." // do not add another :elink. pstat->ulInLink = 2; } xstrClear(&str); pstat->fNeedsP = FALSE; } else return ("No SRC attribute to <IMG> tag."); return (0); } typedef struct _HANDLERDEF { PCSZ pcszTag; PTAGHANDLER pHandler; } HANDLERDEF, *PHANDLERDEF; typedef struct _HANDLERTREENODE { TREE Tree; // ulkey is used for (ULONG)pcszTag PTAGHANDLER pHandler; } HANDLERTREENODE, *PHANDLERTREENODE; /* *@@ HandlersList: * array of all tags we understand with the * handler func that handles the tag, or NULL * if the tag is to be silently ignored. * * We build a string map from this list for * fast lookup in main(). * *@@added V0.9.16 (2001-11-22) [umoeller] */ HANDLERDEF HandlersList[] = { "P", HandleP, "BR", HandleBR, "LI", HandleLI, "IMG", HandleIMG, "HR", HandleHR, "I", HandleI, "EM", HandleI, "B", HandleB, "STRONG", HandleB, "U", HandleU, "CODE", HandleCODE, "CITE", HandleCODE, "TT", HandleCODE, "A", HandleA, "UL", HandleUL, "OL", HandleOL, "DL", HandleDL, "DT", HandleDT, "DD", HandleDD, "HEAD", HandleHEAD, "PRE", HandlePRE, "HTML", HandleHTML, "TITLE", HandleTITLE, "IFDEF", HandleIFDEF, "IFNDEF", HandleIFNDEF, "BODY", NULL, // ignore "H1", NULL, // ignore "H2", NULL, // ignore "H3", NULL, // ignore "H4", NULL, // ignore "H5", NULL, // ignore "H6", NULL // ignore }; TREE *G_HandlersTreeRoot; /* *@@ HandleTag: * HTML tag handler. Gets called from ParseFile * whenever a '<' character is found in the HTML * sources. We then look up the tag name in the * string map we built in main() and call the * respective handler, if one exists. * *@@changed V0.9.16 (2001-11-22) [umoeller]: major speedup with tag handlers string map */ const char* HandleTag(PARTICLETREENODE pFile2Process, PSTATUS pstat) // in/out: parser status { const char *pcszError = NULL; PXSTRING pxstrIPF = &pFile2Process->strIPF; PSZ pStartOfTagName = pstat->pSource + 1; // is this a comment? if (!strncmp(pStartOfTagName, "!--", 3)) { // start of comment: // find end of comment PSZ pEnd; if (pEnd = strstr(pStartOfTagName + 3, "-->")) { // found: // search on after end of comment pstat->pSource = pEnd + 2; } else { // end of comment not found: // stop formatting... pcszError = "Terminating comment not found."; pstat->fFatal = TRUE; } } else { // no comment: // find end of tag PSZ p2 = pStartOfTagName, // pNextClose = 0, // receives first '>' after '<' pNextSpace = 0; // receives first ' ' after '<' BOOL fCont = TRUE; pstat->pNextClose = 0; while (fCont) { switch (*p2) { case ' ': case '\r': case '\n': case '\t': // store first space after '<' if (!pNextSpace) pNextSpace = p2; *p2 = ' '; break; case '>': // end of tag found: pstat->pNextClose = p2; fCont = FALSE; break; case '<': // another opening tag: // that's an HTML error return ("Opening < within another tag found."); break; case 0: fCont = FALSE; break; } p2++; } if (pstat->pNextClose) { // end of tag found: ULONG cbTag; PSZ pszAttrs = NULL; if ((pNextSpace) && (pNextSpace < pstat->pNextClose)) { // we have attributes: cbTag = pNextSpace - pStartOfTagName; pszAttrs = strhSubstr(pNextSpace, pstat->pNextClose); } else cbTag = pstat->pNextClose - pStartOfTagName; if (!cbTag) { // happens if we have a "<>" in the text: // just insert the '<>' and go on, we have no tag here xstrcatc(pxstrIPF, *(pstat->pSource)++); xstrcatc(pxstrIPF, *(pstat->pSource)++); pstat->ulLineLength += 2; } else { PHANDLERTREENODE pHandlerTreeNode; PTAGHANDLER pHandler = NULL; BOOL fClosingTag = FALSE; // attributes come after this // and go up to pNextClose // add a null terminator so we can // use strcmp for testing for tags CHAR cSaved = *(pStartOfTagName + cbTag); *(pStartOfTagName + cbTag) = '\0'; // printf("'%s' ", pStartOfTagName); // tags which can be open or closed // (e.g. B or /B): PSZ pStart2 = pStartOfTagName; if (*pStart2 == '/') { // closing tag: fClosingTag = TRUE; // use next char for strcmp pStart2++; } // now handle tags if (pHandlerTreeNode = (PHANDLERTREENODE)treeFind(G_HandlersTreeRoot, (ULONG)pStart2, fnCompareStrings)) { // handler tree node found: if (pHandlerTreeNode->pHandler) // tag needs handling code: pcszError = pHandlerTreeNode->pHandler(pFile2Process, pxstrIPF, pstat, fClosingTag, pszAttrs); } else { xstrcpy(&G_strError, "", 0); xstrCatf(&G_strError, "Unknown tag %s (%s)", pStartOfTagName, pStart2); pcszError = G_strError.psz; } // restore char under null terminator *(pStartOfTagName + cbTag) = cSaved; // skip the tag, even if unknown pstat->pSource = pstat->pNextClose; if (pszAttrs) free(pszAttrs); } } // end if (pNextClose) else { xstrcatc(pxstrIPF, *(pstat->pSource)); // no closing tag found: // just return, caller will try next char // (probably no tag anyway) } } return (pcszError); } /* *@@ ParseFile: * converts the HTML code in the buffer to IPF. * * This is a complete conversion except: * * -- automatic resids will be assigned later, * unless this file has <HTML RESID=xxx> set; * * -- the :hX. tag will not be added here * (because it depends on sublinks). * * This adds files to G_llFiles2Process if they * are linked to from this file by calling * GetOrCreateArticle. */ APIRET ParseFile(PARTICLETREENODE pFile2Process, PSZ pszBuf) { PXSTRING pxstrIPF = &pFile2Process->strIPF; STATUS stat = {0}; lstInit(&stat.llListStack, FALSE); xstrInit(&stat.strLinkTag, 0); // V0.9.20 (2002-07-12) [umoeller] stat.fJustHadSpace = TRUE; // start at beginning of buffer stat.pSource = pszBuf; while (TRUE) { const char *pcszError = NULL; CHAR c = *(stat.pSource); PSZ pSaved = stat.pSource; switch (c) { case '<': pcszError = HandleTag(pFile2Process, &stat); break; case ':': if (!stat.fInHead) { CheckP(pxstrIPF, &stat); xstrcat(pxstrIPF, "&colon.", 7); stat.ulLineLength += 7; } break; case '.': if (!stat.fInHead) { CheckP(pxstrIPF, &stat); xstrcat(pxstrIPF, "&per.", 5); stat.ulLineLength += 5; } break; case '\r': // just skip this break; case '\n': case ' ': // throw out all line breaks and spaces // unless we're in PRE mode if (stat.fInPre) xstrcatc(pxstrIPF, c); else { CheckP(pxstrIPF, &stat); // add a space instead, if we didn't just have one // dump duplicate spaces if (!stat.fJustHadSpace) { // on spaces, check how long the line is already... if (stat.ulLineLength > 50) { xstrcatc(pxstrIPF, '\n'); stat.ulLineLength = 0; } else { xstrcatc(pxstrIPF, ' '); (stat.ulLineLength)++; } stat.fJustHadSpace = TRUE; } } break; case '\0': // null terminator reached: // some safety checks if (stat.fInHead) pcszError = "No closing </HEAD> tag found."; break; default: if (!stat.fInHead) { CheckP(pxstrIPF, &stat); xstrcatc(pxstrIPF, c); (stat.ulLineLength)++; stat.fJustHadSpace = FALSE; } } if (pcszError) { // get the line count CHAR c = *pSaved; *pSaved = 0; Error(1, __FILE__, __LINE__, __FUNCTION__, "file \"%s\", line %d:\n %s", (PSZ)pFile2Process->Tree.ulKey, // file name strhCount(pszBuf, '\n') + 1, pcszError); *pSaved = c; if (stat.fFatal) return (1); // otherwise continue } if (*(stat.pSource)) (stat.pSource)++; else break; } ConvertEscapes(&pFile2Process->strTitle); ConvertEscapes(pxstrIPF); return (0); } /* ****************************************************************** * * File processor * ********************************************************************/ /* *@@ AppendToMainBuffer: * */ VOID AppendToMainBuffer(PXSTRING pxstrIPF, PARTICLETREENODE pFile2Process, PXSTRING pstrExtras) // temp buffer for speed { if (pFile2Process->ulHeaderLevel == 1) { // root file: xstrCatf(pxstrIPF, ":title.%s\n", pFile2Process->strTitle.psz); } xstrCatf(pxstrIPF, "\n.* Source file: \"%s\"\n", (PSZ)pFile2Process->Tree.ulKey); // pszFilename); xstrcpy(pstrExtras, NULL, 0); if (pFile2Process->lGroup) xstrCatf(pstrExtras, " group=%d", pFile2Process->lGroup); if (pFile2Process->pszXPos) xstrCatf(pstrExtras, " x=%s", pFile2Process->pszXPos); if (pFile2Process->pszWidth) xstrCatf(pstrExtras, " width=%s", pFile2Process->pszWidth); if (pFile2Process->fHidden) xstrcpy(pstrExtras, " hide", 0); ULONG ulHeaderLevel = pFile2Process->ulHeaderLevel; // if this is not at root level already, // move down one level (unless -r is specified) if (!G_fNoMoveToRoot) if (ulHeaderLevel > 1) ulHeaderLevel--; xstrCatf(pxstrIPF, ":h%d res=%d%s.%s\n", ulHeaderLevel, pFile2Process->ulResID, (pstrExtras->ulLength) ? pstrExtras->psz : "", pFile2Process->strTitle.psz); xstrcat(pxstrIPF, ":p.\n", 0); // and the rest of the converted file xstrcats(pxstrIPF, &pFile2Process->strIPF); pFile2Process->fWritten = TRUE; } /* *@@ DumpArticlesWithParent: * */ VOID DumpArticlesWithParent(PXSTRING pxstrIPF, PARTICLETREENODE pParent, PXSTRING pstrExtras) { PLISTNODE pNode = lstQueryFirstNode(&G_llFiles2Process); while (pNode) { PARTICLETREENODE pFile2Process = (PARTICLETREENODE)pNode->pItemData; // rule out special links for now if ( (pFile2Process->ulHeaderLevel != -1) && (!pFile2Process->fWritten) ) { if (pFile2Process->pFirstReferencedFrom == pParent) { if (G_ulVerbosity > 1) printf("File \"%s\" has resid %d, %d bytes IPF\n", (PSZ)pFile2Process->Tree.ulKey, // pszFilename, pFile2Process->ulResID, pFile2Process->strIPF.ulLength); AppendToMainBuffer(pxstrIPF, pFile2Process, pstrExtras); // recurse with this node DumpArticlesWithParent(pxstrIPF, pFile2Process, pstrExtras); } // build a linked list of articles which belong // under this article /* PLISTNODE pSubnode = lstQueryFirstNode(&G_llFiles2Process); while (pSubnode) { PARTICLETREENODE pSub = (PARTICLETREENODE)pSubnode->pItemData; if ( (pSub->pFirstReferencedFrom == pFile2Process) && (pSub->ulHeaderLevel != -1) ) { if (G_ulVerbosity > 1) printf(" loop 2: Writing out %s\n", pSub->pszFilename); AppendToMainBuffer(pxstrIPF, pSub, &strExtras); // marks the file as "written" } pSubnode = pSubnode->pNext; } */ } // now for the next main node pNode = pNode->pNext; } } /* *@@ ProcessFiles: * loops through all files. * * When this is called (from main()), G_llFiles2Process * contains only the one file that was specified on * the command line. This then calls ParseFile() on * that file, which will add to the list for every * A tag that links to another file. * * After ParseFile() has been called for each such * file then, we run several additional loops here * for resid resolution and such. * * Finally, in the last loop, the IPF code from all * files is added to the one given XSTRING buffer, * which will then be written out to disk by the * caller (main()). */ APIRET ProcessFiles(PXSTRING pxstrIPF) // out: one huge IPF file { PLISTNODE pNode; APIRET arc = NO_ERROR; /* * loop 1: * */ if (G_ulVerbosity > 1) printf("Reading source files...\n"); // go thru the list of files to process and // translate them from HTML to IPF... // when this func gets called, this list // contains only the root file, but ParseFile // will add new files to the list, so we just // keep going XSTRING strSource; xstrInit(&strSource, 0); pNode = lstQueryFirstNode(&G_llFiles2Process); while (pNode) { PARTICLETREENODE pFile2Process = (PARTICLETREENODE)pNode->pItemData; // rule out special links for now if (pFile2Process->ulHeaderLevel != -1) { PSZ pszContents = NULL; if (G_ulVerbosity > 1) { printf("processing %s... ", (PSZ)pFile2Process->Tree.ulKey); // pszFilename); fflush(stdout); } else if (G_ulVerbosity) { printf("."); fflush(stdout); } xstrcpy(&G_strCrashContext, "", 0); xstrCatf(&G_strCrashContext, "Processing file %s", (PSZ)pFile2Process->Tree.ulKey); ULONG cbRead = 0; if (!(arc = doshLoadTextFile((PSZ)pFile2Process->Tree.ulKey, // pszFilename, &pszContents, &cbRead))) { xstrset2(&strSource, pszContents, cbRead - 1); xstrConvertLineFormat(&strSource, CRLF2LF); ResolveEntities(pFile2Process, &strSource); // now go convert this buffer to IPF if (!(arc = ParseFile(pFile2Process, strSource.psz))) { } pFile2Process->fProcessed = TRUE; if (G_ulVerbosity > 1) printf("\n"); } if (arc) { Error(2, __FILE__, __LINE__, __FUNCTION__, "Error %d occured reading file \"%s\".", arc, (PSZ)pFile2Process->Tree.ulKey); // pszFilename); arc = 1; } } if (arc) break; // next file (ParseFile has added files to list) pNode = pNode->pNext; } if (!arc) { /* * loop 2: * */ // go through the entire file list again // and assign an automatic resid if this // has not been set yet if (G_ulVerbosity > 1) printf("Assigning automatic resids...\n"); ULONG ulNextResID = 10000; pNode = lstQueryFirstNode(&G_llFiles2Process); while (pNode) { PARTICLETREENODE pFile2Process = (PARTICLETREENODE)pNode->pItemData; // process special links too if (pFile2Process->ulResID == -1) pFile2Process->ulResID = ulNextResID++; pNode = pNode->pNext; } if (G_ulVerbosity > 1) printf(" Done, %d resids assigned (%d files had explicit resids)\n", (ulNextResID - 10000), lstCountItems(&G_llFiles2Process) - (ulNextResID - 10000)); /* * loop 3: * */ if (G_ulVerbosity > 1) printf("Replacing link strings...\n"); // go through the entire file list again // and in each IPF buffer, replace the // special link strings with the good resids, // which are known by now ULONG ulStartKeyLen = strlen(START_KEY), ulEndKeyLen = strlen(END_KEY); pNode = lstQueryFirstNode(&G_llFiles2Process); while (pNode && !arc) { PARTICLETREENODE pFile2Process = (PARTICLETREENODE)pNode->pItemData; // rule out special links for now if (pFile2Process->ulHeaderLevel != -1) { PSZ pStart; if (pStart = pFile2Process->strIPF.psz) // could be empty V0.9.16 (2001-11-22) [umoeller] { PSZ p; // now, find the special ugly link keys // we hacked in before; for each string // target filename now, find the resid while (p = (strstr(pStart, START_KEY))) { PSZ pFirst = pFile2Process->strIPF.psz; PSZ p2 = strstr(p + ulStartKeyLen, END_KEY); if (!p2) { Error(2, __FILE__, __LINE__, __FUNCTION__, "Cannot find second LINK string for \"%s\"\nFile: \"%s\"\n", p, (PSZ)pFile2Process->Tree.ulKey); // pszFilename); arc = 1; break; } CHAR cSaved = *p2; *p2 = '\0'; if (G_ulVerbosity > 2) printf(" encountered link \"%s\" at %d (len %d), searching resid\n", p + ulStartKeyLen, p - pFirst, p2 - pFirst); PARTICLETREENODE pTarget = GetOrCreateArticle(p + ulStartKeyLen, // filename -1, // do not create NULL); if (!pTarget) { Error(2, __FILE__, __LINE__, __FUNCTION__, "Cannot resolve cross reference for \"%s\"\nFile: \"%s\"\n", p + ulStartKeyLen, (PSZ)pFile2Process->Tree.ulKey); // pszFilename); arc = 1; break; } *p2 = cSaved; CHAR szResID[30]; sprintf(szResID, "%d", pTarget->ulResID); ULONG ulResIDLen = strlen(szResID); ULONG ulFirst = p - pFirst, cReplace = (p2 + ulStartKeyLen) - p; if (G_ulVerbosity > 2) printf(" ofs %d, cReplace %d, replacing with \"%s\"\n", ulFirst, cReplace, szResID); xstrrpl(&pFile2Process->strIPF, // ofs of first char to replace: ulFirst, // count of chars to replace: cReplace, // (p2 + KEYLEN) - p, // replace this with the resid szResID, ulResIDLen); pStart = pFile2Process->strIPF.psz + ulFirst; } } } pNode = pNode->pNext; } if (!arc) { XSTRING strExtras; xstrInit(&strExtras, 0); /* * loop 4: * */ if (G_ulVerbosity > 1) printf("Composing IPF buffer...\n"); LINKLIST llSubarticles; lstInit(&llSubarticles, FALSE); // now dump out IPF into one huge buffer... // note, this loop will go thru all files again, // but since we have a second loop within which // marks files as written, this will effectively // only process the root items DumpArticlesWithParent(pxstrIPF, NULL, // root documents only &strExtras); // this will recurse /* * loop 5: * */ // add all the special links // to the bottom; these should be the only // unprocessed files, or something went wrong here pNode = lstQueryFirstNode(&G_llFiles2Process); BOOL fFirst = TRUE; XSTRING strEncode; xstrInit(&strEncode, 0); while (pNode) { PARTICLETREENODE pFile2Process = (PARTICLETREENODE)pNode->pItemData; // process only the special files now if (pFile2Process->ulHeaderLevel == -1) { if (fFirst) { xstrcat(pxstrIPF, ":h1 group=99 x=right width=30%.", 0); xstrcat(pxstrIPF, G_pcszResourcesOnInternetTitle, 0); xstrcatc(pxstrIPF, '\n'); xstrcat(pxstrIPF, G_pcszResourcesOnInternetBody, 0); xstrcatc(pxstrIPF, '\n'); fFirst = FALSE; } // encode the strings again because STUPID ipf // gets confused otherwise xstrcpy(&strEncode, (PSZ)pFile2Process->Tree.ulKey, // pszFilename, 0); ConvertEscapes(&strEncode); ULONG ulOfs = 0; while (xstrFindReplaceC(&strEncode, &ulOfs, ":", "&colon.")) ; xstrCatf(pxstrIPF, ":h2 res=%d group=98 x=right y=bottom width=60%% height=40%%.%s\n", pFile2Process->ulResID, strEncode.psz); xstrCatf(pxstrIPF, ":p.:lines align=center." "\n%s\n" ":p.:link reftype=launch object='netscape.exe' data='%s'.\n" "%s\n" ":elink.:elines.\n", G_pcszClickBelow, (PSZ)pFile2Process->Tree.ulKey, // pszFilename, strEncode.psz); } else if (!pFile2Process->fProcessed) Error(2, __FILE__, __LINE__, __FUNCTION__, "Strange, file \"%s\" wasn't processed.", (PSZ)pFile2Process->Tree.ulKey); // pszFilename); pNode = pNode->pNext; } if (G_ulVerbosity > 1) printf("\nDone processing files.\n"); else printf("\n"); } } return (arc); } /* ****************************************************************** * * main * ********************************************************************/ /* *@@ AddDefine: * */ BOOL AddDefine(const char *pcszDefine) // in: first char after #define { PCSZ p = pcszDefine; PSZ pszIdentifier = NULL, pszValue = NULL; /* printf("testing %c%c%c%c%c%c%c%c%c%c\n", *p, *(p+1), *(p+2), *(p+3), *(p+4), *(p+5), *(p+6), *(p+7), *(p+8), *(p+9)); */ while ((*p) && (!pszValue)) { switch (*p) { // skip comments case '/': case '\r': case '\n': // forget it // printf(" quitting\n"); return (FALSE); case ' ': case '\t': break; case '"': { if (pszIdentifier) { /* printf("testing %c%c%c%c%c%c%c%c%c%c\n", *p, *(p+1), *(p+2), *(p+3), *(p+4), *(p+5), *(p+6), *(p+7), *(p+8), *(p+9)); */ // looks like a string constant... PCSZ pEnd = p + 1; while (*pEnd && !pszValue) { switch (*pEnd) { case '\r': case '\n': // forget it return (FALSE); case '"': // allow for \" escape // V0.9.20 (2002-07-12) [umoeller] if (pEnd[-1] != '\\') // looks goood! pszValue = strhSubstr(p + 1, // after quote pEnd); // printf("-->found \"%s\"\n", pszValue); break; } if ((pEnd) && (*pEnd)) pEnd++; else break; } } else // quote before identifier: don't think so return (FALSE); } break; default: { // regular character: // go to the next space // this must be the identifier then /* printf("testing %c%c%c%c%c\n", *p, *(p+1), *(p+2), *(p+3), *(p+4)); */ PCSZ pEnd = p; // first non-space character while ( (*pEnd) && (*pEnd != ' ') && (*pEnd != '\t') && (*pEnd != '\n') ) pEnd++; if ( (*pEnd == ' ') || (*pEnd == '\t') || ((pszIdentifier) && (*pEnd == '\n')) ) { // got it if (!pszIdentifier) { pszIdentifier = strhSubstr(p, pEnd); // printf(" got identifier \"%s\"\n", pszIdentifier); } else pszValue = strhSubstr(p, pEnd); // go on after this p = pEnd; } else // bull return (FALSE); } break; } if ((p) && (*p)) p++; else return (FALSE); } if (pszValue) { if (!strcmp(pszIdentifier, "H2I_INTERNET_TITLE")) { G_pcszResourcesOnInternetTitle = pszValue; } else if (!strcmp(pszIdentifier, "H2I_INTERNET_BODY")) { G_pcszResourcesOnInternetBody = pszValue; } else if (!strcmp(pszIdentifier, "H2I_INTERNET_CLICK")) { G_pcszClickBelow = pszValue; } else { PDEFINENODE pMapping = NEW(DEFINENODE); pMapping->Tree.ulKey = (ULONG)pszIdentifier; // handle escapes V0.9.20 (2002-07-12) [umoeller] XSTRING strValue; xstrInitSet(&strValue, pszValue); ULONG ulOfs; ulOfs = 0; while (xstrFindReplaceC(&strValue, &ulOfs, // replace \" with " "\\\"", "\"")) ; ulOfs = 0; while (xstrFindReplaceC(&strValue, &ulOfs, // replace \r with carriage return "\\r", "\r")) ; ulOfs = 0; while (xstrFindReplaceC(&strValue, &ulOfs, // replace \n with newline "\\n", "\n")) ; ulOfs = 0; while (xstrFindReplaceC(&strValue, &ulOfs, // replace \\ with backslash "\\\\", "\\")) ; pszValue = strValue.psz; // do not free strValue pMapping->pszValue = pszValue; pMapping->ulValueLength = strlen(pszValue); if (AddDefinition(pMapping)) { if (G_ulVerbosity > 2) printf(" found #define \"%s\" \"%s\"\n", pszIdentifier, pszValue); G_cDefines++; return (TRUE); } } } return (FALSE); } /* *@@ ParseCHeader: * */ APIRET ParseCHeader(const char *pcszHeaderFile, PULONG pcDefines) { PSZ pszContents; APIRET arc; ULONG cDefines = 0; ULONG cbRead = 0; if (!(arc = doshLoadTextFile(pcszHeaderFile, &pszContents, &cbRead))) { XSTRING strSource; xstrInitSet2(&strSource, pszContents, cbRead - 1); xstrConvertLineFormat(&strSource, CRLF2LF); PSZ p = strSource.psz; while (TRUE) { switch (*p) { // skip comments case '/': switch (*(p+1)) { case '/': // C++ comment // go to the end of the line, below p = strhFindEOL(p + 1, NULL); break; case '*': // C comment p = strstr(p+2, "*/"); break; } break; case '#': if (!strncmp(p + 1, "define", 6)) if (AddDefine(p + 7)) cDefines++; // go to the end of the line, // there can't be anything else p = strhFindEOL(p + 1, NULL); break; case ' ': case '\t': // ignore p++; continue; break; } if (p && (*p)) p++; else break; } xstrClear(&strSource); } if (!arc) *pcDefines = cDefines; return (arc); } /* *@@ main: * program entry point. * */ int main(int argc, char* argv[]) { int rc = 0; /* * 1) initialization * */ xstrInit(&G_strError, 0); xstrInit(&G_strCrashContext, 0); xstrcpy(&G_strCrashContext, "Startup", 0); TRY_LOUD(excpt1) { CHAR szRootFile[CCHMAXPATH] = ""; BOOL fShowStatistics = FALSE; treeInit(&G_LinkIDsTreeRoot, NULL); treeInit(&G_DefinesTreeRoot, NULL); lstInit(&G_llFiles2Process, FALSE); LINKLIST llIncludes; lstInit(&llIncludes, TRUE); // will hold plain -i filenames from strdup /* * 2) parse command line * */ xstrcpy(&G_strCrashContext, "Parsing command line", 0); if (argc < 2) { Explain(NULL); rc = 999; } else { SHORT i = 0; while ( (i++ < argc - 1) && (!rc) ) { if (argv[i][0] == '-') { // option found: SHORT i2; for (i2 = 1; i2 < strlen(argv[i]) && (!rc); i2++) { CHAR cOption = argv[i][i2]; switch (cOption) { case 'h': case '?': Explain(NULL); rc = 1; break; case 's': fShowStatistics = TRUE; break; case 'v': i2++; switch (argv[i][i2]) { case '0': G_ulVerbosity = 0; break; case '1': G_ulVerbosity = 1; break; // default also case '3': G_ulVerbosity = 3; break; case '4': G_ulVerbosity = 4; break; default: G_ulVerbosity = 2; } break; case 'i': if (strlen(&argv[i][i2+1])) { lstAppendItem(&llIncludes, strdup(&argv[i][i2+1])); } else { Error(2, __FILE__, __LINE__, __FUNCTION__, "No filename specified with -i option."); rc = 99; } i2 = 999999999; break; case 'r': G_fNoMoveToRoot = TRUE; break; default: // unknown option Explain("Unknown option '%c'.", cOption); rc = 999; break; } } } // end if (argv[i][0] == '-') else { /* * collect file names: * */ if (!szRootFile[0]) strcpy(szRootFile, argv[i]); else { Explain("More than one root file specified."); rc = 999; } } // end elseif (argv[i][0] == '-') } } if (!szRootFile[0] && !rc) { Explain("You have not specified any input file."); rc = 999; } if (!rc) { if (G_ulVerbosity) PrintHeader(); /* * build handlers tree * */ treeInit(&G_HandlersTreeRoot, NULL); ULONG ul; for (ul = 0; ul < ARRAYITEMCOUNT(HandlersList); ul++) { PHANDLERTREENODE pNode = NEW(HANDLERTREENODE); pNode->Tree.ulKey = (ULONG)HandlersList[ul].pcszTag; pNode->pHandler = HandlersList[ul].pHandler; treeInsert(&G_HandlersTreeRoot, NULL, (TREE*)pNode, fnCompareStrings); } /* * parse includes * */ xstrcpy(&G_strCrashContext, "Parsing include files", 0); PLISTNODE pNode = lstQueryFirstNode(&llIncludes); while (pNode) { PSZ pszInclude = (PSZ)pNode->pItemData; ULONG cDefines = 0; if (!(rc = ParseCHeader(pszInclude, &cDefines))) { if (G_ulVerbosity) printf("Found %d valid #define's in \"%s\".\n", cDefines, pszInclude); } else Error(2, __FILE__, __LINE__, __FUNCTION__, "Error %d opening include file \"%s\".", rc, pszInclude); pNode = pNode->pNext; } /* * finally, convert HTML to IPF * */ // main buffer: XSTRING str; xstrInit(&str, 100*1000); xstrcpy(&str, ":userdoc.\n", 0); xstrcat(&str, ":docprof toc=12345.\n", // let heading levels 1-5 appear in TOC 0); xstrcat(&str, ".* Converted from HTML to IPF by " H2I_HEADER "\n", 0); xstrcpy(&G_strCrashContext, "Reading root article", 0); GetOrCreateArticle(szRootFile, 0, // current nesting level will be 1 then NULL); rc = ProcessFiles(&str); xstrcat(&str, ":euserdoc.", 0); xstrcpy(&G_strCrashContext, "Writing IPF", 0); if (!rc) { CHAR szOutputFile[CCHMAXPATH]; strcpy(szOutputFile, szRootFile); PSZ p = strrchr(szOutputFile, '.'); if (p) strcpy(p, ".ipf"); else strcat(szOutputFile, ".ipf"); ULONG cbWritten = 0; XSTRING str2; xstrInit(&str2, str.ulLength * 2 / 3); p = str.psz; CHAR c; while (c = *p++) { if (c == '\n') xstrcatc(&str2, '\r'); xstrcatc(&str2, c); } xstrcatc(&str2, '\0'); if (rc = doshWriteTextFile(szOutputFile, str2.psz, &cbWritten, NULL)) Error(2, __FILE__, __LINE__, __FUNCTION__, "Error %d writing output file \"%s\".", rc, szOutputFile); else if ((fShowStatistics) || (G_ulVerbosity > 1)) { printf("%d HTML files were processed\n", lstCountItems(&G_llFiles2Process)); printf("%d #defines were active\n", G_cDefines); printf("%d character entities were replaced\n", G_ulReplacements); printf("Output file \"%s\" successfully written, %d bytes\n", szOutputFile, cbWritten); } } } } CATCH(excpt1) { Error(2, __FILE__, __LINE__, __FUNCTION__, "Exception caught in main(), context: %s", G_strCrashContext.psz); rc = 1; } END_CATCH(); return (rc); }