home *** CD-ROM | disk | FTP | other *** search
/ OS/2 Shareware BBS: 10 Tools / 10-Tools.zip / wxos2233.zip / wxOS2-2_3_3.zip / wxWindows-2.3.3 / src / html / htmlpars.cpp < prev    next >
C/C++ Source or Header  |  2002-05-08  |  24KB  |  822 lines

  1. /////////////////////////////////////////////////////////////////////////////
  2. // Name:        htmlpars.cpp
  3. // Purpose:     wxHtmlParser class (generic parser)
  4. // Author:      Vaclav Slavik
  5. // RCS-ID:      $Id: htmlpars.cpp,v 1.28 2002/05/05 18:26:24 VS Exp $
  6. // Copyright:   (c) 1999 Vaclav Slavik
  7. // Licence:     wxWindows Licence
  8. /////////////////////////////////////////////////////////////////////////////
  9.  
  10.  
  11. #ifdef __GNUG__
  12. #pragma implementation "htmlpars.h"
  13. #endif
  14.  
  15. #include "wx/wxprec.h"
  16.  
  17. #include "wx/defs.h"
  18. #if wxUSE_HTML && wxUSE_STREAMS
  19.  
  20. #ifdef __BORDLANDC__
  21. #pragma hdrstop
  22. #endif
  23.  
  24. #ifndef WXPRECOMP
  25.     #include "wx/log.h"
  26.     #include "wx/intl.h"
  27. #endif
  28.  
  29. #include "wx/tokenzr.h"
  30. #include "wx/wfstream.h"
  31. #include "wx/url.h"
  32. #include "wx/fontmap.h"
  33. #include "wx/html/htmldefs.h"
  34. #include "wx/html/htmlpars.h"
  35. #include "wx/dynarray.h"
  36. #include "wx/arrimpl.cpp"
  37.  
  38. //-----------------------------------------------------------------------------
  39. // wxHtmlParser helpers
  40. //-----------------------------------------------------------------------------
  41.  
  42. class wxHtmlTextPiece
  43. {
  44. public:
  45.     wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {}
  46.     int m_pos, m_lng;
  47. };
  48.  
  49. WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces);
  50. WX_DEFINE_OBJARRAY(wxHtmlTextPieces);
  51.  
  52. class wxHtmlParserState
  53. {
  54. public:
  55.     wxHtmlTag         *m_curTag;
  56.     wxHtmlTag         *m_tags;
  57.     wxHtmlTextPieces  *m_textPieces;
  58.     int                m_curTextPiece;
  59.     wxString           m_source;
  60.     wxHtmlParserState *m_nextState;
  61. };
  62.  
  63. //-----------------------------------------------------------------------------
  64. // wxHtmlParser
  65. //-----------------------------------------------------------------------------
  66.  
  67. IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)
  68.  
  69. wxHtmlParser::wxHtmlParser()
  70.     : wxObject(), m_HandlersHash(wxKEY_STRING),
  71.       m_FS(NULL), m_HandlersStack(NULL)
  72. {
  73.     m_entitiesParser = new wxHtmlEntitiesParser;
  74.     m_Tags = NULL;
  75.     m_CurTag = NULL;
  76.     m_TextPieces = NULL;
  77.     m_CurTextPiece = 0;
  78.     m_SavedStates = NULL;
  79. }
  80.  
  81. wxHtmlParser::~wxHtmlParser()
  82. {
  83.     while (RestoreState()) {}
  84.     DestroyDOMTree();
  85.     
  86.     delete m_HandlersStack;
  87.     m_HandlersHash.Clear();
  88.     m_HandlersList.DeleteContents(TRUE);
  89.     m_HandlersList.Clear();
  90.     delete m_entitiesParser;
  91. }
  92.  
  93. wxObject* wxHtmlParser::Parse(const wxString& source)
  94. {
  95.     wxObject *result;
  96.  
  97.     InitParser(source);
  98.     DoParsing();
  99.     result = GetProduct();
  100.     DoneParser();
  101.     return result;
  102. }
  103.  
  104. void wxHtmlParser::InitParser(const wxString& source)
  105. {
  106.     SetSource(source);
  107. }
  108.  
  109. void wxHtmlParser::DoneParser()
  110. {
  111.     DestroyDOMTree();
  112. }
  113.  
  114. void wxHtmlParser::SetSource(const wxString& src)
  115. {
  116.     DestroyDOMTree();
  117.     m_Source = src;
  118.     CreateDOMTree();
  119.     m_CurTag = NULL;
  120.     m_CurTextPiece = 0;
  121. }
  122.  
  123. void wxHtmlParser::CreateDOMTree()
  124. {
  125.     wxHtmlTagsCache cache(m_Source);
  126.     m_TextPieces = new wxHtmlTextPieces;
  127.     CreateDOMSubTree(NULL, 0, m_Source.Length(), &cache);
  128.     m_CurTextPiece = 0;
  129. }
  130.  
  131. void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
  132.                                     int begin_pos, int end_pos,
  133.                                     wxHtmlTagsCache *cache)
  134. {
  135.     if (end_pos <= begin_pos) return;
  136.  
  137.     wxChar c;
  138.     int i = begin_pos;
  139.     int textBeginning = begin_pos;
  140.  
  141.     while (i < end_pos)
  142.     {
  143.         c = m_Source.GetChar(i);
  144.  
  145.         if (c == wxT('<'))
  146.         {
  147.             // add text to m_TextPieces:
  148.             if (i - textBeginning > 0)
  149.                 m_TextPieces->Add(
  150.                     wxHtmlTextPiece(textBeginning, i - textBeginning));
  151.  
  152.             // if it is a comment, skip it:
  153.             if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') &&
  154.                                  m_Source.GetChar(i+2) == wxT('-') &&
  155.                                  m_Source.GetChar(i+3) == wxT('-'))
  156.             {
  157.                 // Comments begin with "<!--" and end with "--[ \t\r\n]*>"
  158.                 // according to HTML 4.0
  159.                 int dashes = 0;
  160.                 i += 4;
  161.                 while (i < end_pos)
  162.                 {
  163.                     c = m_Source.GetChar(i++);
  164.                     if ((c == wxT(' ') || c == wxT('\n') ||
  165.                         c == wxT('\r') || c == wxT('\t')) && dashes >= 2) {}
  166.                     else if (c == wxT('>') && dashes >= 2)
  167.                     {
  168.                         textBeginning = i;
  169.                         break;
  170.                     }
  171.                     else if (c == wxT('-'))
  172.                         dashes++;
  173.                     else
  174.                         dashes = 0;
  175.                 }
  176.             }
  177.  
  178.             // add another tag to the tree:
  179.             else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/'))
  180.             {
  181.                 wxHtmlTag *chd;
  182.                 if (cur)
  183.                     chd = new wxHtmlTag(cur, m_Source,
  184.                                         i, end_pos, cache, m_entitiesParser);
  185.                 else
  186.                 {
  187.                     chd = new wxHtmlTag(NULL, m_Source,
  188.                                         i, end_pos, cache, m_entitiesParser);
  189.                     if (!m_Tags)
  190.                     {
  191.                         // if this is the first tag to be created make the root
  192.                         // m_Tags point to it:
  193.                         m_Tags = chd;
  194.                     }
  195.                     else
  196.                     {
  197.                         // if there is already a root tag add this tag as
  198.                         // the last sibling:
  199.                         chd->m_Prev = m_Tags->GetLastSibling();
  200.                         chd->m_Prev->m_Next = chd;
  201.                     }
  202.                 }
  203.  
  204.                 if (chd->HasEnding())
  205.                 {
  206.                     CreateDOMSubTree(chd,
  207.                                      chd->GetBeginPos(), chd->GetEndPos1(),
  208.                                      cache);
  209.                     i = chd->GetEndPos2();
  210.                 }
  211.                 else
  212.                     i = chd->GetBeginPos();
  213.                 textBeginning = i;
  214.             }
  215.  
  216.             // ... or skip ending tag:
  217.             else
  218.             {
  219.                 while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++;
  220.                 textBeginning = i+1;
  221.             }
  222.         }
  223.         else i++;
  224.     }
  225.  
  226.     // add remaining text to m_TextPieces:
  227.     if (end_pos - textBeginning > 0)
  228.         m_TextPieces->Add(
  229.             wxHtmlTextPiece(textBeginning, end_pos - textBeginning));
  230. }
  231.  
  232. void wxHtmlParser::DestroyDOMTree()
  233. {
  234.     wxHtmlTag *t1, *t2;
  235.     t1 = m_Tags;
  236.     while (t1)
  237.     {
  238.         t2 = t1->GetNextSibling();
  239.         delete t1;
  240.         t1 = t2;
  241.     }
  242.     m_Tags = m_CurTag = NULL;
  243.  
  244.     delete m_TextPieces;
  245.     m_TextPieces = NULL;
  246. }
  247.  
  248. void wxHtmlParser::DoParsing()
  249. {
  250.     m_CurTag = m_Tags;
  251.     m_CurTextPiece = 0;
  252.     DoParsing(0, m_Source.Length());
  253. }
  254.  
  255. void wxHtmlParser::DoParsing(int begin_pos, int end_pos)
  256. {
  257.     if (end_pos <= begin_pos) return;
  258.  
  259.     wxHtmlTextPieces& pieces = *m_TextPieces;
  260.     size_t piecesCnt = pieces.GetCount();
  261.  
  262.     while (begin_pos < end_pos)
  263.     {
  264.         while (m_CurTag && m_CurTag->GetBeginPos() < begin_pos)
  265.             m_CurTag = m_CurTag->GetNextTag();
  266.         while (m_CurTextPiece < piecesCnt &&
  267.                pieces[m_CurTextPiece].m_pos < begin_pos)
  268.             m_CurTextPiece++;
  269.  
  270.         if (m_CurTextPiece < piecesCnt &&
  271.             (!m_CurTag ||
  272.              pieces[m_CurTextPiece].m_pos < m_CurTag->GetBeginPos()))
  273.         {
  274.             // Add text:
  275.             AddText(GetEntitiesParser()->Parse(
  276.                        m_Source.Mid(pieces[m_CurTextPiece].m_pos,
  277.                                     pieces[m_CurTextPiece].m_lng)));
  278.             begin_pos = pieces[m_CurTextPiece].m_pos +
  279.                         pieces[m_CurTextPiece].m_lng;
  280.             m_CurTextPiece++;
  281.         }
  282.         else if (m_CurTag)
  283.         {
  284.             // Add tag:
  285.             if (m_CurTag)
  286.             {
  287.                 if (m_CurTag->HasEnding())
  288.                     begin_pos = m_CurTag->GetEndPos2();
  289.                 else
  290.                     begin_pos = m_CurTag->GetBeginPos();
  291.             }
  292.             wxHtmlTag *t = m_CurTag;
  293.             m_CurTag = m_CurTag->GetNextTag();
  294.             AddTag(*t);
  295.         }
  296.         else break;
  297.     }
  298. }
  299.  
  300. void wxHtmlParser::AddTag(const wxHtmlTag& tag)
  301. {
  302.     wxHtmlTagHandler *h;
  303.     bool inner = FALSE;
  304.  
  305.     h = (wxHtmlTagHandler*) m_HandlersHash.Get(tag.GetName());
  306.     if (h)
  307.         inner = h->HandleTag(tag);
  308.     if (!inner)
  309.     {
  310.         if (tag.HasEnding())
  311.             DoParsing(tag.GetBeginPos(), tag.GetEndPos1());
  312.     }
  313. }
  314.  
  315. void wxHtmlParser::AddTagHandler(wxHtmlTagHandler *handler)
  316. {
  317.     wxString s(handler->GetSupportedTags());
  318.     wxStringTokenizer tokenizer(s, wxT(", "));
  319.  
  320.     while (tokenizer.HasMoreTokens())
  321.         m_HandlersHash.Put(tokenizer.GetNextToken(), handler);
  322.  
  323.     if (m_HandlersList.IndexOf(handler) == wxNOT_FOUND)
  324.         m_HandlersList.Append(handler);
  325.  
  326.     handler->SetParser(this);
  327. }
  328.  
  329. void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, wxString tags)
  330. {
  331.     wxStringTokenizer tokenizer(tags, wxT(", "));
  332.     wxString key;
  333.  
  334.     if (m_HandlersStack == NULL)
  335.     {
  336.         m_HandlersStack = new wxList;
  337.         m_HandlersStack->DeleteContents(TRUE);
  338.     }
  339.  
  340.     m_HandlersStack->Insert(new wxHashTable(m_HandlersHash));
  341.  
  342.     while (tokenizer.HasMoreTokens())
  343.     {
  344.         key = tokenizer.GetNextToken();
  345.         m_HandlersHash.Delete(key);
  346.         m_HandlersHash.Put(key, handler);
  347.     }
  348. }
  349.  
  350. void wxHtmlParser::PopTagHandler()
  351. {
  352.     wxNode *first;
  353.  
  354.     if (m_HandlersStack == NULL ||
  355.         (first = m_HandlersStack->GetFirst()) == NULL)
  356.     {
  357.         wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack."));
  358.         return;
  359.     }
  360.     m_HandlersHash = *((wxHashTable*) first->GetData());
  361.     m_HandlersStack->DeleteNode(first);
  362. }
  363.  
  364. void wxHtmlParser::SetSourceAndSaveState(const wxString& src)
  365. {
  366.     wxHtmlParserState *s = new wxHtmlParserState;
  367.  
  368.     s->m_curTag = m_CurTag;
  369.     s->m_tags = m_Tags;
  370.     s->m_textPieces = m_TextPieces;
  371.     s->m_curTextPiece = m_CurTextPiece;
  372.     s->m_source = m_Source;
  373.  
  374.     s->m_nextState = m_SavedStates;
  375.     m_SavedStates = s;
  376.  
  377.     m_CurTag = NULL;
  378.     m_Tags = NULL;
  379.     m_TextPieces = NULL;
  380.     m_CurTextPiece = 0;
  381.     m_Source = wxEmptyString;
  382.  
  383.     SetSource(src);
  384. }
  385.  
  386. bool wxHtmlParser::RestoreState()
  387. {
  388.     if (!m_SavedStates) return FALSE;
  389.  
  390.     DestroyDOMTree();
  391.  
  392.     wxHtmlParserState *s = m_SavedStates;
  393.     m_SavedStates = s->m_nextState;
  394.  
  395.     m_CurTag = s->m_curTag;
  396.     m_Tags = s->m_tags;
  397.     m_TextPieces = s->m_textPieces;
  398.     m_CurTextPiece = s->m_curTextPiece;
  399.     m_Source = s->m_source;
  400.  
  401.     delete s;
  402.     return TRUE;
  403. }
  404.  
  405. //-----------------------------------------------------------------------------
  406. // wxHtmlTagHandler
  407. //-----------------------------------------------------------------------------
  408.  
  409. IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler,wxObject)
  410.  
  411.  
  412. //-----------------------------------------------------------------------------
  413. // wxHtmlEntitiesParser
  414. //-----------------------------------------------------------------------------
  415.  
  416. IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser,wxObject)
  417.  
  418. wxHtmlEntitiesParser::wxHtmlEntitiesParser()
  419. #if wxUSE_WCHAR_T && !wxUSE_UNICODE
  420.     : m_conv(NULL), m_encoding(wxFONTENCODING_SYSTEM)
  421. #endif
  422. {
  423. }
  424.  
  425. wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
  426. {
  427. #if wxUSE_WCHAR_T && !wxUSE_UNICODE
  428.     delete m_conv;
  429. #endif
  430. }
  431.  
  432. void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding)
  433. {
  434. #if wxUSE_WCHAR_T && !wxUSE_UNICODE
  435.     if (encoding == m_encoding) return;
  436.     delete m_conv;
  437.     m_conv = NULL;
  438.     m_encoding = encoding;
  439.     if (m_encoding != wxFONTENCODING_SYSTEM)
  440.         m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding));
  441. #else
  442.     (void) encoding;
  443. #endif
  444. }
  445.  
  446. wxString wxHtmlEntitiesParser::Parse(const wxString& input)
  447. {
  448.     const wxChar *c, *last;
  449.     const wxChar *in_str = input.c_str();
  450.     wxString output;
  451.     
  452.     output.reserve(input.length());
  453.  
  454.     for (c = in_str, last = in_str; *c != wxT('\0'); c++)
  455.     {
  456.         if (*c == wxT('&'))
  457.         {
  458.             if (c - last > 0)
  459.                 output.append(last, c - last);
  460.             if (++c == wxT('\0')) break;
  461.         
  462.             wxString entity;
  463.             const wxChar *ent_s = c;
  464.             wxChar entity_char;
  465.         
  466.             for (; (*c >= wxT('a') && *c <= wxT('z')) ||
  467.                    (*c >= wxT('A') && *c <= wxT('Z')) ||
  468.                    (*c >= wxT('0') && *c <= wxT('9')) ||
  469.                    *c == wxT('_') || *c == wxT('#'); c++) {}
  470.             entity.append(ent_s, c - ent_s);
  471.             if (*c != wxT(';')) c--;
  472.             last = c+1;
  473.             entity_char = GetEntityChar(entity);
  474.             if (entity_char)
  475.                 output << entity_char;
  476.             else
  477.             {
  478.                 output.append(ent_s-1, c-ent_s+2);
  479.                 wxLogDebug(wxT("Unrecognized HTML entity: '%s'"), entity.c_str());
  480.             }
  481.         }
  482.     }
  483.     if (*last != wxT('\0'))
  484.         output.append(last);
  485.     return output;
  486. }
  487.  
  488. struct wxHtmlEntityInfo
  489. {
  490.     const wxChar *name;
  491.     unsigned code;
  492. };
  493.  
  494. extern "C" int LINKAGEMODE wxHtmlEntityCompare(const void *key, const void *item)
  495. {
  496.     return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
  497. }
  498.  
  499. wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
  500. {
  501. #if wxUSE_UNICODE
  502.     return (wxChar)code;
  503. #elif wxUSE_WCHAR_T
  504.     char buf[2];
  505.     wchar_t wbuf[2];
  506.     wbuf[0] = (wchar_t)code;
  507.     wbuf[1] = 0;
  508.     wxMBConv *conv = m_conv ? m_conv : &wxConvLocal;
  509.     if (conv->WC2MB(buf, wbuf, 2) == (size_t)-1)
  510.         return '?';
  511.     return buf[0];
  512. #else
  513.     return (code < 256) ? (wxChar)code : '?';
  514. #endif
  515. }
  516.  
  517. wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
  518. {
  519.     unsigned code = 0;
  520.  
  521.     if (entity[0] == wxT('#'))
  522.     {
  523.         const wxChar *ent_s = entity.c_str();
  524.         const wxChar *format;
  525.  
  526.         if (ent_s[1] == wxT('x') || ent_s[1] == wxT('X'))
  527.         {
  528.             format = wxT("%x");
  529.             ent_s++;
  530.         }
  531.         else
  532.             format = wxT("%u");
  533.         ent_s++;
  534.  
  535.         if (wxSscanf(ent_s, format, &code) != 1)
  536.             code = 0;
  537.     }
  538.     else
  539.     {
  540.         static wxHtmlEntityInfo substitutions[] = {
  541.             { wxT("AElig"),198 },
  542.             { wxT("Aacute"),193 },
  543.             { wxT("Acirc"),194 },
  544.             { wxT("Agrave"),192 },
  545.             { wxT("Alpha"),913 },
  546.             { wxT("Aring"),197 },
  547.             { wxT("Atilde"),195 },
  548.             { wxT("Auml"),196 },
  549.             { wxT("Beta"),914 },
  550.             { wxT("Ccedil"),199 },
  551.             { wxT("Chi"),935 },
  552.             { wxT("Dagger"),8225 },
  553.             { wxT("Delta"),916 },
  554.             { wxT("ETH"),208 },
  555.             { wxT("Eacute"),201 },
  556.             { wxT("Ecirc"),202 },
  557.             { wxT("Egrave"),200 },
  558.             { wxT("Epsilon"),917 },
  559.             { wxT("Eta"),919 },
  560.             { wxT("Euml"),203 },
  561.             { wxT("Gamma"),915 },
  562.             { wxT("Iacute"),205 },
  563.             { wxT("Icirc"),206 },
  564.             { wxT("Igrave"),204 },
  565.             { wxT("Iota"),921 },
  566.             { wxT("Iuml"),207 },
  567.             { wxT("Kappa"),922 },
  568.             { wxT("Lambda"),923 },
  569.             { wxT("Mu"),924 },
  570.             { wxT("Ntilde"),209 },
  571.             { wxT("Nu"),925 },
  572.             { wxT("OElig"),338 },
  573.             { wxT("Oacute"),211 },
  574.             { wxT("Ocirc"),212 },
  575.             { wxT("Ograve"),210 },
  576.             { wxT("Omega"),937 },
  577.             { wxT("Omicron"),927 },
  578.             { wxT("Oslash"),216 },
  579.             { wxT("Otilde"),213 },
  580.             { wxT("Ouml"),214 },
  581.             { wxT("Phi"),934 },
  582.             { wxT("Pi"),928 },
  583.             { wxT("Prime"),8243 },
  584.             { wxT("Psi"),936 },
  585.             { wxT("Rho"),929 },
  586.             { wxT("Scaron"),352 },
  587.             { wxT("Sigma"),931 },
  588.             { wxT("THORN"),222 },
  589.             { wxT("Tau"),932 },
  590.             { wxT("Theta"),920 },
  591.             { wxT("Uacute"),218 },
  592.             { wxT("Ucirc"),219 },
  593.             { wxT("Ugrave"),217 },
  594.             { wxT("Upsilon"),933 },
  595.             { wxT("Uuml"),220 },
  596.             { wxT("Xi"),926 },
  597.             { wxT("Yacute"),221 },
  598.             { wxT("Yuml"),376 },
  599.             { wxT("Zeta"),918 },
  600.             { wxT("aacute"),225 },
  601.             { wxT("acirc"),226 },
  602.             { wxT("acute"),180 },
  603.             { wxT("aelig"),230 },
  604.             { wxT("agrave"),224 },
  605.             { wxT("alefsym"),8501 },
  606.             { wxT("alpha"),945 },
  607.             { wxT("amp"),38 },
  608.             { wxT("and"),8743 },
  609.             { wxT("ang"),8736 },
  610.             { wxT("aring"),229 },
  611.             { wxT("asymp"),8776 },
  612.             { wxT("atilde"),227 },
  613.             { wxT("auml"),228 },
  614.             { wxT("bdquo"),8222 },
  615.             { wxT("beta"),946 },
  616.             { wxT("brvbar"),166 },
  617.             { wxT("bull"),8226 },
  618.             { wxT("cap"),8745 },
  619.             { wxT("ccedil"),231 },
  620.             { wxT("cedil"),184 },
  621.             { wxT("cent"),162 },
  622.             { wxT("chi"),967 },
  623.             { wxT("circ"),710 },
  624.             { wxT("clubs"),9827 },
  625.             { wxT("cong"),8773 },
  626.             { wxT("copy"),169 },
  627.             { wxT("crarr"),8629 },
  628.             { wxT("cup"),8746 },
  629.             { wxT("curren"),164 },
  630.             { wxT("dArr"),8659 },
  631.             { wxT("dagger"),8224 },
  632.             { wxT("darr"),8595 },
  633.             { wxT("deg"),176 },
  634.             { wxT("delta"),948 },
  635.             { wxT("diams"),9830 },
  636.             { wxT("divide"),247 },
  637.             { wxT("eacute"),233 },
  638.             { wxT("ecirc"),234 },
  639.             { wxT("egrave"),232 },
  640.             { wxT("empty"),8709 },
  641.             { wxT("emsp"),8195 },
  642.             { wxT("ensp"),8194 },
  643.             { wxT("epsilon"),949 },
  644.             { wxT("equiv"),8801 },
  645.             { wxT("eta"),951 },
  646.             { wxT("eth"),240 },
  647.             { wxT("euml"),235 },
  648.             { wxT("euro"),8364 },
  649.             { wxT("exist"),8707 },
  650.             { wxT("fnof"),402 },
  651.             { wxT("forall"),8704 },
  652.             { wxT("frac12"),189 },
  653.             { wxT("frac14"),188 },
  654.             { wxT("frac34"),190 },
  655.             { wxT("frasl"),8260 },
  656.             { wxT("gamma"),947 },
  657.             { wxT("ge"),8805 },
  658.             { wxT("gt"),62 },
  659.             { wxT("hArr"),8660 },
  660.             { wxT("harr"),8596 },
  661.             { wxT("hearts"),9829 },
  662.             { wxT("hellip"),8230 },
  663.             { wxT("iacute"),237 },
  664.             { wxT("icirc"),238 },
  665.             { wxT("iexcl"),161 },
  666.             { wxT("igrave"),236 },
  667.             { wxT("image"),8465 },
  668.             { wxT("infin"),8734 },
  669.             { wxT("int"),8747 },
  670.             { wxT("iota"),953 },
  671.             { wxT("iquest"),191 },
  672.             { wxT("isin"),8712 },
  673.             { wxT("iuml"),239 },
  674.             { wxT("kappa"),954 },
  675.             { wxT("lArr"),8656 },
  676.             { wxT("lambda"),955 },
  677.             { wxT("lang"),9001 },
  678.             { wxT("laquo"),171 },
  679.             { wxT("larr"),8592 },
  680.             { wxT("lceil"),8968 },
  681.             { wxT("ldquo"),8220 },
  682.             { wxT("le"),8804 },
  683.             { wxT("lfloor"),8970 },
  684.             { wxT("lowast"),8727 },
  685.             { wxT("loz"),9674 },
  686.             { wxT("lrm"),8206 },
  687.             { wxT("lsaquo"),8249 },
  688.             { wxT("lsquo"),8216 },
  689.             { wxT("lt"),60 },
  690.             { wxT("macr"),175 },
  691.             { wxT("mdash"),8212 },
  692.             { wxT("micro"),181 },
  693.             { wxT("middot"),183 },
  694.             { wxT("minus"),8722 },
  695.             { wxT("mu"),956 },
  696.             { wxT("nabla"),8711 },
  697.             { wxT("nbsp"),160 },
  698.             { wxT("ndash"),8211 },
  699.             { wxT("ne"),8800 },
  700.             { wxT("ni"),8715 },
  701.             { wxT("not"),172 },
  702.             { wxT("notin"),8713 },
  703.             { wxT("nsub"),8836 },
  704.             { wxT("ntilde"),241 },
  705.             { wxT("nu"),957 },
  706.             { wxT("oacute"),243 },
  707.             { wxT("ocirc"),244 },
  708.             { wxT("oelig"),339 },
  709.             { wxT("ograve"),242 },
  710.             { wxT("oline"),8254 },
  711.             { wxT("omega"),969 },
  712.             { wxT("omicron"),959 },
  713.             { wxT("oplus"),8853 },
  714.             { wxT("or"),8744 },
  715.             { wxT("ordf"),170 },
  716.             { wxT("ordm"),186 },
  717.             { wxT("oslash"),248 },
  718.             { wxT("otilde"),245 },
  719.             { wxT("otimes"),8855 },
  720.             { wxT("ouml"),246 },
  721.             { wxT("para"),182 },
  722.             { wxT("part"),8706 },
  723.             { wxT("permil"),8240 },
  724.             { wxT("perp"),8869 },
  725.             { wxT("phi"),966 },
  726.             { wxT("pi"),960 },
  727.             { wxT("piv"),982 },
  728.             { wxT("plusmn"),177 },
  729.             { wxT("pound"),163 },
  730.             { wxT("prime"),8242 },
  731.             { wxT("prod"),8719 },
  732.             { wxT("prop"),8733 },
  733.             { wxT("psi"),968 },
  734.             { wxT("quot"),34 },
  735.             { wxT("rArr"),8658 },
  736.             { wxT("radic"),8730 },
  737.             { wxT("rang"),9002 },
  738.             { wxT("raquo"),187 },
  739.             { wxT("rarr"),8594 },
  740.             { wxT("rceil"),8969 },
  741.             { wxT("rdquo"),8221 },
  742.             { wxT("real"),8476 },
  743.             { wxT("reg"),174 },
  744.             { wxT("rfloor"),8971 },
  745.             { wxT("rho"),961 },
  746.             { wxT("rlm"),8207 },
  747.             { wxT("rsaquo"),8250 },
  748.             { wxT("rsquo"),8217 },
  749.             { wxT("sbquo"),8218 },
  750.             { wxT("scaron"),353 },
  751.             { wxT("sdot"),8901 },
  752.             { wxT("sect"),167 },
  753.             { wxT("shy"),173 },
  754.             { wxT("sigma"),963 },
  755.             { wxT("sigmaf"),962 },
  756.             { wxT("sim"),8764 },
  757.             { wxT("spades"),9824 },
  758.             { wxT("sub"),8834 },
  759.             { wxT("sube"),8838 },
  760.             { wxT("sum"),8721 },
  761.             { wxT("sup"),8835 },
  762.             { wxT("sup1"),185 },
  763.             { wxT("sup2"),178 },
  764.             { wxT("sup3"),179 },
  765.             { wxT("supe"),8839 },
  766.             { wxT("szlig"),223 },
  767.             { wxT("tau"),964 },
  768.             { wxT("there4"),8756 },
  769.             { wxT("theta"),952 },
  770.             { wxT("thetasym"),977 },
  771.             { wxT("thinsp"),8201 },
  772.             { wxT("thorn"),254 },
  773.             { wxT("tilde"),732 },
  774.             { wxT("times"),215 },
  775.             { wxT("trade"),8482 },
  776.             { wxT("uArr"),8657 },
  777.             { wxT("uacute"),250 },
  778.             { wxT("uarr"),8593 },
  779.             { wxT("ucirc"),251 },
  780.             { wxT("ugrave"),249 },
  781.             { wxT("uml"),168 },
  782.             { wxT("upsih"),978 },
  783.             { wxT("upsilon"),965 },
  784.             { wxT("uuml"),252 },
  785.             { wxT("weierp"),8472 },
  786.             { wxT("xi"),958 },
  787.             { wxT("yacute"),253 },
  788.             { wxT("yen"),165 },
  789.             { wxT("yuml"),255 },
  790.             { wxT("zeta"),950 },
  791.             { wxT("zwj"),8205 },
  792.             { wxT("zwnj"),8204 },
  793.             {NULL, 0}};
  794.         static size_t substitutions_cnt = 0;
  795.  
  796.         if (substitutions_cnt == 0)
  797.             while (substitutions[substitutions_cnt].code != 0)
  798.                 substitutions_cnt++;
  799.  
  800.         wxHtmlEntityInfo *info;
  801.         info = (wxHtmlEntityInfo*) bsearch(entity.c_str(), substitutions,
  802.                                            substitutions_cnt,
  803.                                            sizeof(wxHtmlEntityInfo),
  804.                                            wxHtmlEntityCompare);
  805.         if (info)
  806.             code = info->code;
  807.     }
  808.  
  809.     if (code == 0)
  810.         return 0;
  811.     else
  812.         return GetCharForCode(code);
  813. }
  814.  
  815. wxFSFile *wxHtmlParser::OpenURL(wxHtmlURLType WXUNUSED(type), 
  816.                                 const wxString& url) const
  817. {
  818.     return GetFS()->OpenFile(url);
  819. }
  820.  
  821. #endif
  822.