home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 5 Edit
/
05-Edit.zip
/
ssrtf.ycc
< prev
next >
Wrap
Text File
|
1995-06-14
|
7KB
|
195 lines
//------------------------------------------------------------------
//
// Note : Set tabs to 6 to view this file.
//
// Here is our first crack at an RTF parser. It is very 'sparse' in
// that it just breaks the input up into tokens and doesn't look
// for many control words (you can add these easily, see below),
// but it should parse all RTF files.
//
// Like HTML, RTF is problematic, at least from a 'grammar' point
// of view. There is a very detailed RTF spec, but most RTF writers
// don't follow it exactly. These deviations can cause parsing errors.
// It's probably best to just break out the control words you are
// looking for, and don't worry to much about 'forming' the tokens
// with a grammar. All your doing is transferring the work to
// the code. You would like to have the grammar handle it, but alas...
//
// There is a commented out section which handles the 'fonttbl'
// portion. We tested it here, and it seems to work. We did run
// into 1 problem while developing this. Word sometimes sticks
// '\n' characters in between some of the font descriptions, so
// we had to account for that, which is a nuisance. If the
// '\n' characters can show up randomly, another solution will
// be needed, our solution only accounts for them between
// individual font descriptions. This is typical of the seemingly
// small deviations that can cause problems in a grammar. There
// are ways to do this with %expression lists, but I'm not sure
// its worth the effort. You're probably better off having the
// parser do things like recognizing specific tokens, and handling
// the logic in the code, see above.
//
// It is very easy to add control words to this rule file. Just
// add them after the 'CtrlWParm' regular expression. We use two
// different tokens for the control words, one with the optional
// space, and one with the numeric parameter. You can do the same
// thing when adding the control words.
//
// In the grammar, just add 'statement -> '\myNewCtrlWord';' for
// each control word. This way, you won't have to look for the
// control words in your code, they will each have their own
// token.
//
// If you run into problems, let us know by call 800-988-9023, or
// send an e-mail to 'willd.accessnv.com' on the internet, or
// 71332,2577 on CompuServe.
//-------------------------------------------------------------------
%macro
{n} '\-?[0-9]+';
{hex} '[a-f0-9]';
%expression Main
'[^\\{}; \n]+' Text, 'text';
'[ \n]+' White, 'white';
'\{' OCurly, '{';
'\}' CCurly, '}';
'\\\*' Dest, '\*';
'\\[a-z]+[ ]?' Ctrl, '\ctrl';
'\\[a-z]+\-?[0-9]*' CtrlWParm, '\ctrlWParm';
'\\rtf[0-9]+' Rtf, '\rtf';
'\\ansi[ ]?' Ansi, '\ansi';
'\\mac[ ]?' Mac, '\mac';
'\\pc[ ]?' Pc, '\pc';
'\\pca[ ]?' Pca, '\pca';
//'\\fonttbl[ ]?' Fonttbl, '\fonttbl';
//'\\f[0-9]+' Fnum, '\f';
//'\\fnil[ ]?' Fnil, '\fnil';
//'\\froman[ ]?' Froman '\froman';
//'\\fswiss[ ]?' Fswiss '\fswiss';
//'\\fmodern[ ]?' Fmodern '\fmodern';
//'\\fscript[ ]?' Fscript '\fscript';
//'\\fdecor[ ]?' Fdecor '\fdecor';
//'\\ftech[ ]?' Ftech '\ftech';
//'\\fbidi[ ]?' Fbidi '\fbidi';
//'\\fcharset[0-9]+' Fcharset '\fcharset';
//'\\fprq[0-9]+' Fprq '\fprq';
//'\\fontemb[ ]?' Fontemb '\fontemb';
//'\\ftnil[ ]?' Ftnil '\ftnil';
//'\\fttruetype[ ]?' Fttruetype '\fttruetype';
//'\\falt[ ]?' Falt '\falt';
//'\\cpg[0-9]+' Cpg '\cpg';
'\\[^a-z]' Spec, '\spec';
'\\`{hex}{hex}' SpecHex, '\`';
'\\\-' SpecHy, '\-';
'\\_' SpecNonbHy, '\_';
'\\\{' SpecOCurly, '\{';
'\\\|' SpecFmula, '\|';
'\\\}' SpecCCurly, '\}';
'\\~' SpecNonbSp, '\~';
';' Semi, ';';
%production start
Start start -> junkOpt group junkOpt;
GroupNested group -> '{' statements '}';
StatementList statements -> statements statement;
StatementOne statements -> statement;
StatementCtrl statement -> '\ctrl';
StatementCtrlWParm statement -> '\ctrlWParm';
StatementSpecAny statement -> '\spec';
StatementSpecHex statement -> '\`';
StatementSpecDest statement -> '\*';
StatementSpecHyphen statement -> '\-';
StatementSpecNonbHyph statement -> '\_';
StatementSpecOCurly statement -> '\{';
StatementSpecFormula statement -> '\|';
StatementSpecCCurly statement -> '\}';
StatementSpecNonbSpace statement -> '\~';
StatementSemi statement -> ';';
StatementText statement -> 'text';
StatementWhite statement -> 'white';
StatementGroup statement -> 'group';
//StatementFontref statement -> '\f';
StatementRtf statement -> rtf;
StatementCharset statement -> charset;
//StatementFonttbl statement -> fonttbl;
//StatementFiletbl statement -> filetbl;
//StatementColortbl statement -> colortbl;
Rtf rtf -> '\rtf';
CharsetAnsi charset -> '\ansi';
CharsetMac charset -> '\mac';
CharsetPc charset -> '\pc';
CharsetPca charset -> '\pca';
//Fonttbl fonttbl -> '{' '\fonttbl' fonts '}';
//FontList fonts -> fonts whiteOpt font;
//FontListOne fonts -> font;
//FontBracketed font -> '{' fontinfo '}';
//FontPlain font -> fontinfo;
//FontinfoStatement fontinfo -> fnum ffamily fcharsetOpt fprqOpt
// fembOpt fcodepageOpt fdatalist
// faltnameOpt ';';
//FontNum fnum -> '\f';
//FontFamilyNil ffamily -> '\fnil';
//FontFamilyRoman ffamily -> '\froman';
//FontFamilySwiss ffamily -> '\fswiss';
//FontFamilyModern ffamily -> '\fmodern';
//FontFamilyScript ffamily -> '\fscript';
//FontFamilyDecor ffamily -> '\fdecor';
//FontFamilyTech ffamily -> '\ftech';
//FontFamilyBidi ffamily -> '\fbidi';
//FontCharsetNull fcharsetOpt -> ;
//FontCharset fcharsetOpt -> '\fcharset';
//FontPrqNull fprqOpt -> ;
//FontPrq fprqOpt -> '\fprq';
//FontEmbNull fembOpt -> ;
//FontEmb fembOpt -> '{' '\*' '\fontemb' fonttype
// fontdataOpt '}';
//FontTypeNil fonttype -> '\ftnil';
//FontTypeTrue fonttype -> '\fttruetype';
//FontDataNull fontdataOpt -> ;
//FontDataList fontdataOpt -> fdatalist;
//FDataList fdatalist -> fdatalist fdata;
//FDataListOne fdatalist -> fdata;
//FDataText fdata -> 'text';
//FDataWhite fdata -> 'white';
//FontAltNameOpt faltnameOpt -> ;
//FontAltName faltnameOpt -> '{' '\*' '\falt' 'text' '}';
//FontCodePageOptNull fcodepageOpt -> ;
//FontCodepageOpt fcodepageOpt -> '\cpg';
//WhiteOptNull whiteOpt -> ;
//WhiteOpt whiteOpt -> 'white';
Junk junkOpt -> junkStatement;
JunkNull junkOpt -> ;
JunkStatementCtrl junkStatement -> '\ctrl';
JunkStatementCtrlWParm junkStatement -> '\ctrlWParm';
JunkStatementSpecAny junkStatement -> '\spec';
JunkStatementSpecHex junkStatement -> '\`';
JunkStatementSpecDest junkStatement -> '\*';
JunkStatementSpecHyphen junkStatement -> '\-';
JunkStatementSpecNonbHy junkStatement -> '\_';
JunkStatementSpecOCurly junkStatement -> '\{';
JunkStatementSpecFmula junkStatement -> '\|';
JunkStatementSpecCCurly junkStatement -> '\}';
JunkStatementSpecNonbSp junkStatement -> '\~';
JunkStatementWhite junkStatement -> 'white';
JunkStatementText junkStatement -> 'text';