home *** CD-ROM | disk | FTP | other *** search
/ HTML Examples / WP.iso / wordpress / wp-includes / SimplePie / Decode / HTML / Entities.php
Encoding:
PHP Script  |  2012-11-21  |  16.9 KB  |  618 lines

  1. <?php
  2. /**
  3.  * SimplePie
  4.  *
  5.  * A PHP-Based RSS and Atom Feed Framework.
  6.  * Takes the hard work out of managing a complete RSS/Atom solution.
  7.  *
  8.  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9.  * All rights reserved.
  10.  *
  11.  * Redistribution and use in source and binary forms, with or without modification, are
  12.  * permitted provided that the following conditions are met:
  13.  *
  14.  *     * Redistributions of source code must retain the above copyright notice, this list of
  15.  *       conditions and the following disclaimer.
  16.  *
  17.  *     * Redistributions in binary form must reproduce the above copyright notice, this list
  18.  *       of conditions and the following disclaimer in the documentation and/or other materials
  19.  *       provided with the distribution.
  20.  *
  21.  *     * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22.  *       to endorse or promote products derived from this software without specific prior
  23.  *       written permission.
  24.  *
  25.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26.  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27.  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28.  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29.  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30.  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32.  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33.  * POSSIBILITY OF SUCH DAMAGE.
  34.  *
  35.  * @package SimplePie
  36.  * @version 1.3.1
  37.  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38.  * @author Ryan Parman
  39.  * @author Geoffrey Sneddon
  40.  * @author Ryan McCue
  41.  * @link http://simplepie.org/ SimplePie
  42.  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43.  */
  44.  
  45.  
  46. /**
  47.  * Decode HTML Entities
  48.  *
  49.  * This implements HTML5 as of revision 967 (2007-06-28)
  50.  *
  51.  * @deprecated Use DOMDocument instead!
  52.  * @package SimplePie
  53.  */
  54. class SimplePie_Decode_HTML_Entities
  55. {
  56.     /**
  57.      * Data to be parsed
  58.      *
  59.      * @access private
  60.      * @var string
  61.      */
  62.     var $data = '';
  63.  
  64.     /**
  65.      * Currently consumed bytes
  66.      *
  67.      * @access private
  68.      * @var string
  69.      */
  70.     var $consumed = '';
  71.  
  72.     /**
  73.      * Position of the current byte being parsed
  74.      *
  75.      * @access private
  76.      * @var int
  77.      */
  78.     var $position = 0;
  79.  
  80.     /**
  81.      * Create an instance of the class with the input data
  82.      *
  83.      * @access public
  84.      * @param string $data Input data
  85.      */
  86.     public function __construct($data)
  87.     {
  88.         $this->data = $data;
  89.     }
  90.  
  91.     /**
  92.      * Parse the input data
  93.      *
  94.      * @access public
  95.      * @return string Output data
  96.      */
  97.     public function parse()
  98.     {
  99.         while (($this->position = strpos($this->data, '&', $this->position)) !== false)
  100.         {
  101.             $this->consume();
  102.             $this->entity();
  103.             $this->consumed = '';
  104.         }
  105.         return $this->data;
  106.     }
  107.  
  108.     /**
  109.      * Consume the next byte
  110.      *
  111.      * @access private
  112.      * @return mixed The next byte, or false, if there is no more data
  113.      */
  114.     public function consume()
  115.     {
  116.         if (isset($this->data[$this->position]))
  117.         {
  118.             $this->consumed .= $this->data[$this->position];
  119.             return $this->data[$this->position++];
  120.         }
  121.         else
  122.         {
  123.             return false;
  124.         }
  125.     }
  126.  
  127.     /**
  128.      * Consume a range of characters
  129.      *
  130.      * @access private
  131.      * @param string $chars Characters to consume
  132.      * @return mixed A series of characters that match the range, or false
  133.      */
  134.     public function consume_range($chars)
  135.     {
  136.         if ($len = strspn($this->data, $chars, $this->position))
  137.         {
  138.             $data = substr($this->data, $this->position, $len);
  139.             $this->consumed .= $data;
  140.             $this->position += $len;
  141.             return $data;
  142.         }
  143.         else
  144.         {
  145.             return false;
  146.         }
  147.     }
  148.  
  149.     /**
  150.      * Unconsume one byte
  151.      *
  152.      * @access private
  153.      */
  154.     public function unconsume()
  155.     {
  156.         $this->consumed = substr($this->consumed, 0, -1);
  157.         $this->position--;
  158.     }
  159.  
  160.     /**
  161.      * Decode an entity
  162.      *
  163.      * @access private
  164.      */
  165.     public function entity()
  166.     {
  167.         switch ($this->consume())
  168.         {
  169.             case "\x09":
  170.             case "\x0A":
  171.             case "\x0B":
  172.             case "\x0B":
  173.             case "\x0C":
  174.             case "\x20":
  175.             case "\x3C":
  176.             case "\x26":
  177.             case false:
  178.                 break;
  179.  
  180.             case "\x23":
  181.                 switch ($this->consume())
  182.                 {
  183.                     case "\x78":
  184.                     case "\x58":
  185.                         $range = '0123456789ABCDEFabcdef';
  186.                         $hex = true;
  187.                         break;
  188.  
  189.                     default:
  190.                         $range = '0123456789';
  191.                         $hex = false;
  192.                         $this->unconsume();
  193.                         break;
  194.                 }
  195.  
  196.                 if ($codepoint = $this->consume_range($range))
  197.                 {
  198.                     static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8");
  199.  
  200.                     if ($hex)
  201.                     {
  202.                         $codepoint = hexdec($codepoint);
  203.                     }
  204.                     else
  205.                     {
  206.                         $codepoint = intval($codepoint);
  207.                     }
  208.  
  209.                     if (isset($windows_1252_specials[$codepoint]))
  210.                     {
  211.                         $replacement = $windows_1252_specials[$codepoint];
  212.                     }
  213.                     else
  214.                     {
  215.                         $replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
  216.                     }
  217.  
  218.                     if (!in_array($this->consume(), array(';', false), true))
  219.                     {
  220.                         $this->unconsume();
  221.                     }
  222.  
  223.                     $consumed_length = strlen($this->consumed);
  224.                     $this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
  225.                     $this->position += strlen($replacement) - $consumed_length;
  226.                 }
  227.                 break;
  228.  
  229.             default:
  230.                 static $entities = array(
  231.                     'Aacute' => "\xC3\x81",
  232.                     'aacute' => "\xC3\xA1",
  233.                     'Aacute;' => "\xC3\x81",
  234.                     'aacute;' => "\xC3\xA1",
  235.                     'Acirc' => "\xC3\x82",
  236.                     'acirc' => "\xC3\xA2",
  237.                     'Acirc;' => "\xC3\x82",
  238.                     'acirc;' => "\xC3\xA2",
  239.                     'acute' => "\xC2\xB4",
  240.                     'acute;' => "\xC2\xB4",
  241.                     'AElig' => "\xC3\x86",
  242.                     'aelig' => "\xC3\xA6",
  243.                     'AElig;' => "\xC3\x86",
  244.                     'aelig;' => "\xC3\xA6",
  245.                     'Agrave' => "\xC3\x80",
  246.                     'agrave' => "\xC3\xA0",
  247.                     'Agrave;' => "\xC3\x80",
  248.                     'agrave;' => "\xC3\xA0",
  249.                     'alefsym;' => "\xE2\x84\xB5",
  250.                     'Alpha;' => "\xCE\x91",
  251.                     'alpha;' => "\xCE\xB1",
  252.                     'AMP' => "\x26",
  253.                     'amp' => "\x26",
  254.                     'AMP;' => "\x26",
  255.                     'amp;' => "\x26",
  256.                     'and;' => "\xE2\x88\xA7",
  257.                     'ang;' => "\xE2\x88\xA0",
  258.                     'apos;' => "\x27",
  259.                     'Aring' => "\xC3\x85",
  260.                     'aring' => "\xC3\xA5",
  261.                     'Aring;' => "\xC3\x85",
  262.                     'aring;' => "\xC3\xA5",
  263.                     'asymp;' => "\xE2\x89\x88",
  264.                     'Atilde' => "\xC3\x83",
  265.                     'atilde' => "\xC3\xA3",
  266.                     'Atilde;' => "\xC3\x83",
  267.                     'atilde;' => "\xC3\xA3",
  268.                     'Auml' => "\xC3\x84",
  269.                     'auml' => "\xC3\xA4",
  270.                     'Auml;' => "\xC3\x84",
  271.                     'auml;' => "\xC3\xA4",
  272.                     'bdquo;' => "\xE2\x80\x9E",
  273.                     'Beta;' => "\xCE\x92",
  274.                     'beta;' => "\xCE\xB2",
  275.                     'brvbar' => "\xC2\xA6",
  276.                     'brvbar;' => "\xC2\xA6",
  277.                     'bull;' => "\xE2\x80\xA2",
  278.                     'cap;' => "\xE2\x88\xA9",
  279.                     'Ccedil' => "\xC3\x87",
  280.                     'ccedil' => "\xC3\xA7",
  281.                     'Ccedil;' => "\xC3\x87",
  282.                     'ccedil;' => "\xC3\xA7",
  283.                     'cedil' => "\xC2\xB8",
  284.                     'cedil;' => "\xC2\xB8",
  285.                     'cent' => "\xC2\xA2",
  286.                     'cent;' => "\xC2\xA2",
  287.                     'Chi;' => "\xCE\xA7",
  288.                     'chi;' => "\xCF\x87",
  289.                     'circ;' => "\xCB\x86",
  290.                     'clubs;' => "\xE2\x99\xA3",
  291.                     'cong;' => "\xE2\x89\x85",
  292.                     'COPY' => "\xC2\xA9",
  293.                     'copy' => "\xC2\xA9",
  294.                     'COPY;' => "\xC2\xA9",
  295.                     'copy;' => "\xC2\xA9",
  296.                     'crarr;' => "\xE2\x86\xB5",
  297.                     'cup;' => "\xE2\x88\xAA",
  298.                     'curren' => "\xC2\xA4",
  299.                     'curren;' => "\xC2\xA4",
  300.                     'Dagger;' => "\xE2\x80\xA1",
  301.                     'dagger;' => "\xE2\x80\xA0",
  302.                     'dArr;' => "\xE2\x87\x93",
  303.                     'darr;' => "\xE2\x86\x93",
  304.                     'deg' => "\xC2\xB0",
  305.                     'deg;' => "\xC2\xB0",
  306.                     'Delta;' => "\xCE\x94",
  307.                     'delta;' => "\xCE\xB4",
  308.                     'diams;' => "\xE2\x99\xA6",
  309.                     'divide' => "\xC3\xB7",
  310.                     'divide;' => "\xC3\xB7",
  311.                     'Eacute' => "\xC3\x89",
  312.                     'eacute' => "\xC3\xA9",
  313.                     'Eacute;' => "\xC3\x89",
  314.                     'eacute;' => "\xC3\xA9",
  315.                     'Ecirc' => "\xC3\x8A",
  316.                     'ecirc' => "\xC3\xAA",
  317.                     'Ecirc;' => "\xC3\x8A",
  318.                     'ecirc;' => "\xC3\xAA",
  319.                     'Egrave' => "\xC3\x88",
  320.                     'egrave' => "\xC3\xA8",
  321.                     'Egrave;' => "\xC3\x88",
  322.                     'egrave;' => "\xC3\xA8",
  323.                     'empty;' => "\xE2\x88\x85",
  324.                     'emsp;' => "\xE2\x80\x83",
  325.                     'ensp;' => "\xE2\x80\x82",
  326.                     'Epsilon;' => "\xCE\x95",
  327.                     'epsilon;' => "\xCE\xB5",
  328.                     'equiv;' => "\xE2\x89\xA1",
  329.                     'Eta;' => "\xCE\x97",
  330.                     'eta;' => "\xCE\xB7",
  331.                     'ETH' => "\xC3\x90",
  332.                     'eth' => "\xC3\xB0",
  333.                     'ETH;' => "\xC3\x90",
  334.                     'eth;' => "\xC3\xB0",
  335.                     'Euml' => "\xC3\x8B",
  336.                     'euml' => "\xC3\xAB",
  337.                     'Euml;' => "\xC3\x8B",
  338.                     'euml;' => "\xC3\xAB",
  339.                     'euro;' => "\xE2\x82\xAC",
  340.                     'exist;' => "\xE2\x88\x83",
  341.                     'fnof;' => "\xC6\x92",
  342.                     'forall;' => "\xE2\x88\x80",
  343.                     'frac12' => "\xC2\xBD",
  344.                     'frac12;' => "\xC2\xBD",
  345.                     'frac14' => "\xC2\xBC",
  346.                     'frac14;' => "\xC2\xBC",
  347.                     'frac34' => "\xC2\xBE",
  348.                     'frac34;' => "\xC2\xBE",
  349.                     'frasl;' => "\xE2\x81\x84",
  350.                     'Gamma;' => "\xCE\x93",
  351.                     'gamma;' => "\xCE\xB3",
  352.                     'ge;' => "\xE2\x89\xA5",
  353.                     'GT' => "\x3E",
  354.                     'gt' => "\x3E",
  355.                     'GT;' => "\x3E",
  356.                     'gt;' => "\x3E",
  357.                     'hArr;' => "\xE2\x87\x94",
  358.                     'harr;' => "\xE2\x86\x94",
  359.                     'hearts;' => "\xE2\x99\xA5",
  360.                     'hellip;' => "\xE2\x80\xA6",
  361.                     'Iacute' => "\xC3\x8D",
  362.                     'iacute' => "\xC3\xAD",
  363.                     'Iacute;' => "\xC3\x8D",
  364.                     'iacute;' => "\xC3\xAD",
  365.                     'Icirc' => "\xC3\x8E",
  366.                     'icirc' => "\xC3\xAE",
  367.                     'Icirc;' => "\xC3\x8E",
  368.                     'icirc;' => "\xC3\xAE",
  369.                     'iexcl' => "\xC2\xA1",
  370.                     'iexcl;' => "\xC2\xA1",
  371.                     'Igrave' => "\xC3\x8C",
  372.                     'igrave' => "\xC3\xAC",
  373.                     'Igrave;' => "\xC3\x8C",
  374.                     'igrave;' => "\xC3\xAC",
  375.                     'image;' => "\xE2\x84\x91",
  376.                     'infin;' => "\xE2\x88\x9E",
  377.                     'int;' => "\xE2\x88\xAB",
  378.                     'Iota;' => "\xCE\x99",
  379.                     'iota;' => "\xCE\xB9",
  380.                     'iquest' => "\xC2\xBF",
  381.                     'iquest;' => "\xC2\xBF",
  382.                     'isin;' => "\xE2\x88\x88",
  383.                     'Iuml' => "\xC3\x8F",
  384.                     'iuml' => "\xC3\xAF",
  385.                     'Iuml;' => "\xC3\x8F",
  386.                     'iuml;' => "\xC3\xAF",
  387.                     'Kappa;' => "\xCE\x9A",
  388.                     'kappa;' => "\xCE\xBA",
  389.                     'Lambda;' => "\xCE\x9B",
  390.                     'lambda;' => "\xCE\xBB",
  391.                     'lang;' => "\xE3\x80\x88",
  392.                     'laquo' => "\xC2\xAB",
  393.                     'laquo;' => "\xC2\xAB",
  394.                     'lArr;' => "\xE2\x87\x90",
  395.                     'larr;' => "\xE2\x86\x90",
  396.                     'lceil;' => "\xE2\x8C\x88",
  397.                     'ldquo;' => "\xE2\x80\x9C",
  398.                     'le;' => "\xE2\x89\xA4",
  399.                     'lfloor;' => "\xE2\x8C\x8A",
  400.                     'lowast;' => "\xE2\x88\x97",
  401.                     'loz;' => "\xE2\x97\x8A",
  402.                     'lrm;' => "\xE2\x80\x8E",
  403.                     'lsaquo;' => "\xE2\x80\xB9",
  404.                     'lsquo;' => "\xE2\x80\x98",
  405.                     'LT' => "\x3C",
  406.                     'lt' => "\x3C",
  407.                     'LT;' => "\x3C",
  408.                     'lt;' => "\x3C",
  409.                     'macr' => "\xC2\xAF",
  410.                     'macr;' => "\xC2\xAF",
  411.                     'mdash;' => "\xE2\x80\x94",
  412.                     'micro' => "\xC2\xB5",
  413.                     'micro;' => "\xC2\xB5",
  414.                     'middot' => "\xC2\xB7",
  415.                     'middot;' => "\xC2\xB7",
  416.                     'minus;' => "\xE2\x88\x92",
  417.                     'Mu;' => "\xCE\x9C",
  418.                     'mu;' => "\xCE\xBC",
  419.                     'nabla;' => "\xE2\x88\x87",
  420.                     'nbsp' => "\xC2\xA0",
  421.                     'nbsp;' => "\xC2\xA0",
  422.                     'ndash;' => "\xE2\x80\x93",
  423.                     'ne;' => "\xE2\x89\xA0",
  424.                     'ni;' => "\xE2\x88\x8B",
  425.                     'not' => "\xC2\xAC",
  426.                     'not;' => "\xC2\xAC",
  427.                     'notin;' => "\xE2\x88\x89",
  428.                     'nsub;' => "\xE2\x8A\x84",
  429.                     'Ntilde' => "\xC3\x91",
  430.                     'ntilde' => "\xC3\xB1",
  431.                     'Ntilde;' => "\xC3\x91",
  432.                     'ntilde;' => "\xC3\xB1",
  433.                     'Nu;' => "\xCE\x9D",
  434.                     'nu;' => "\xCE\xBD",
  435.                     'Oacute' => "\xC3\x93",
  436.                     'oacute' => "\xC3\xB3",
  437.                     'Oacute;' => "\xC3\x93",
  438.                     'oacute;' => "\xC3\xB3",
  439.                     'Ocirc' => "\xC3\x94",
  440.                     'ocirc' => "\xC3\xB4",
  441.                     'Ocirc;' => "\xC3\x94",
  442.                     'ocirc;' => "\xC3\xB4",
  443.                     'OElig;' => "\xC5\x92",
  444.                     'oelig;' => "\xC5\x93",
  445.                     'Ograve' => "\xC3\x92",
  446.                     'ograve' => "\xC3\xB2",
  447.                     'Ograve;' => "\xC3\x92",
  448.                     'ograve;' => "\xC3\xB2",
  449.                     'oline;' => "\xE2\x80\xBE",
  450.                     'Omega;' => "\xCE\xA9",
  451.                     'omega;' => "\xCF\x89",
  452.                     'Omicron;' => "\xCE\x9F",
  453.                     'omicron;' => "\xCE\xBF",
  454.                     'oplus;' => "\xE2\x8A\x95",
  455.                     'or;' => "\xE2\x88\xA8",
  456.                     'ordf' => "\xC2\xAA",
  457.                     'ordf;' => "\xC2\xAA",
  458.                     'ordm' => "\xC2\xBA",
  459.                     'ordm;' => "\xC2\xBA",
  460.                     'Oslash' => "\xC3\x98",
  461.                     'oslash' => "\xC3\xB8",
  462.                     'Oslash;' => "\xC3\x98",
  463.                     'oslash;' => "\xC3\xB8",
  464.                     'Otilde' => "\xC3\x95",
  465.                     'otilde' => "\xC3\xB5",
  466.                     'Otilde;' => "\xC3\x95",
  467.                     'otilde;' => "\xC3\xB5",
  468.                     'otimes;' => "\xE2\x8A\x97",
  469.                     'Ouml' => "\xC3\x96",
  470.                     'ouml' => "\xC3\xB6",
  471.                     'Ouml;' => "\xC3\x96",
  472.                     'ouml;' => "\xC3\xB6",
  473.                     'para' => "\xC2\xB6",
  474.                     'para;' => "\xC2\xB6",
  475.                     'part;' => "\xE2\x88\x82",
  476.                     'permil;' => "\xE2\x80\xB0",
  477.                     'perp;' => "\xE2\x8A\xA5",
  478.                     'Phi;' => "\xCE\xA6",
  479.                     'phi;' => "\xCF\x86",
  480.                     'Pi;' => "\xCE\xA0",
  481.                     'pi;' => "\xCF\x80",
  482.                     'piv;' => "\xCF\x96",
  483.                     'plusmn' => "\xC2\xB1",
  484.                     'plusmn;' => "\xC2\xB1",
  485.                     'pound' => "\xC2\xA3",
  486.                     'pound;' => "\xC2\xA3",
  487.                     'Prime;' => "\xE2\x80\xB3",
  488.                     'prime;' => "\xE2\x80\xB2",
  489.                     'prod;' => "\xE2\x88\x8F",
  490.                     'prop;' => "\xE2\x88\x9D",
  491.                     'Psi;' => "\xCE\xA8",
  492.                     'psi;' => "\xCF\x88",
  493.                     'QUOT' => "\x22",
  494.                     'quot' => "\x22",
  495.                     'QUOT;' => "\x22",
  496.                     'quot;' => "\x22",
  497.                     'radic;' => "\xE2\x88\x9A",
  498.                     'rang;' => "\xE3\x80\x89",
  499.                     'raquo' => "\xC2\xBB",
  500.                     'raquo;' => "\xC2\xBB",
  501.                     'rArr;' => "\xE2\x87\x92",
  502.                     'rarr;' => "\xE2\x86\x92",
  503.                     'rceil;' => "\xE2\x8C\x89",
  504.                     'rdquo;' => "\xE2\x80\x9D",
  505.                     'real;' => "\xE2\x84\x9C",
  506.                     'REG' => "\xC2\xAE",
  507.                     'reg' => "\xC2\xAE",
  508.                     'REG;' => "\xC2\xAE",
  509.                     'reg;' => "\xC2\xAE",
  510.                     'rfloor;' => "\xE2\x8C\x8B",
  511.                     'Rho;' => "\xCE\xA1",
  512.                     'rho;' => "\xCF\x81",
  513.                     'rlm;' => "\xE2\x80\x8F",
  514.                     'rsaquo;' => "\xE2\x80\xBA",
  515.                     'rsquo;' => "\xE2\x80\x99",
  516.                     'sbquo;' => "\xE2\x80\x9A",
  517.                     'Scaron;' => "\xC5\xA0",
  518.                     'scaron;' => "\xC5\xA1",
  519.                     'sdot;' => "\xE2\x8B\x85",
  520.                     'sect' => "\xC2\xA7",
  521.                     'sect;' => "\xC2\xA7",
  522.                     'shy' => "\xC2\xAD",
  523.                     'shy;' => "\xC2\xAD",
  524.                     'Sigma;' => "\xCE\xA3",
  525.                     'sigma;' => "\xCF\x83",
  526.                     'sigmaf;' => "\xCF\x82",
  527.                     'sim;' => "\xE2\x88\xBC",
  528.                     'spades;' => "\xE2\x99\xA0",
  529.                     'sub;' => "\xE2\x8A\x82",
  530.                     'sube;' => "\xE2\x8A\x86",
  531.                     'sum;' => "\xE2\x88\x91",
  532.                     'sup;' => "\xE2\x8A\x83",
  533.                     'sup1' => "\xC2\xB9",
  534.                     'sup1;' => "\xC2\xB9",
  535.                     'sup2' => "\xC2\xB2",
  536.                     'sup2;' => "\xC2\xB2",
  537.                     'sup3' => "\xC2\xB3",
  538.                     'sup3;' => "\xC2\xB3",
  539.                     'supe;' => "\xE2\x8A\x87",
  540.                     'szlig' => "\xC3\x9F",
  541.                     'szlig;' => "\xC3\x9F",
  542.                     'Tau;' => "\xCE\xA4",
  543.                     'tau;' => "\xCF\x84",
  544.                     'there4;' => "\xE2\x88\xB4",
  545.                     'Theta;' => "\xCE\x98",
  546.                     'theta;' => "\xCE\xB8",
  547.                     'thetasym;' => "\xCF\x91",
  548.                     'thinsp;' => "\xE2\x80\x89",
  549.                     'THORN' => "\xC3\x9E",
  550.                     'thorn' => "\xC3\xBE",
  551.                     'THORN;' => "\xC3\x9E",
  552.                     'thorn;' => "\xC3\xBE",
  553.                     'tilde;' => "\xCB\x9C",
  554.                     'times' => "\xC3\x97",
  555.                     'times;' => "\xC3\x97",
  556.                     'TRADE;' => "\xE2\x84\xA2",
  557.                     'trade;' => "\xE2\x84\xA2",
  558.                     'Uacute' => "\xC3\x9A",
  559.                     'uacute' => "\xC3\xBA",
  560.                     'Uacute;' => "\xC3\x9A",
  561.                     'uacute;' => "\xC3\xBA",
  562.                     'uArr;' => "\xE2\x87\x91",
  563.                     'uarr;' => "\xE2\x86\x91",
  564.                     'Ucirc' => "\xC3\x9B",
  565.                     'ucirc' => "\xC3\xBB",
  566.                     'Ucirc;' => "\xC3\x9B",
  567.                     'ucirc;' => "\xC3\xBB",
  568.                     'Ugrave' => "\xC3\x99",
  569.                     'ugrave' => "\xC3\xB9",
  570.                     'Ugrave;' => "\xC3\x99",
  571.                     'ugrave;' => "\xC3\xB9",
  572.                     'uml' => "\xC2\xA8",
  573.                     'uml;' => "\xC2\xA8",
  574.                     'upsih;' => "\xCF\x92",
  575.                     'Upsilon;' => "\xCE\xA5",
  576.                     'upsilon;' => "\xCF\x85",
  577.                     'Uuml' => "\xC3\x9C",
  578.                     'uuml' => "\xC3\xBC",
  579.                     'Uuml;' => "\xC3\x9C",
  580.                     'uuml;' => "\xC3\xBC",
  581.                     'weierp;' => "\xE2\x84\x98",
  582.                     'Xi;' => "\xCE\x9E",
  583.                     'xi;' => "\xCE\xBE",
  584.                     'Yacute' => "\xC3\x9D",
  585.                     'yacute' => "\xC3\xBD",
  586.                     'Yacute;' => "\xC3\x9D",
  587.                     'yacute;' => "\xC3\xBD",
  588.                     'yen' => "\xC2\xA5",
  589.                     'yen;' => "\xC2\xA5",
  590.                     'yuml' => "\xC3\xBF",
  591.                     'Yuml;' => "\xC5\xB8",
  592.                     'yuml;' => "\xC3\xBF",
  593.                     'Zeta;' => "\xCE\x96",
  594.                     'zeta;' => "\xCE\xB6",
  595.                     'zwj;' => "\xE2\x80\x8D",
  596.                     'zwnj;' => "\xE2\x80\x8C"
  597.                 );
  598.  
  599.                 for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
  600.                 {
  601.                     $consumed = substr($this->consumed, 1);
  602.                     if (isset($entities[$consumed]))
  603.                     {
  604.                         $match = $consumed;
  605.                     }
  606.                 }
  607.  
  608.                 if ($match !== null)
  609.                 {
  610.                      $this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
  611.                     $this->position += strlen($entities[$match]) - strlen($consumed) - 1;
  612.                 }
  613.                 break;
  614.         }
  615.     }
  616. }
  617.  
  618.