home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 6 / AACD06.ISO / AACD / Programming / ICU / src / icu / source / i18n / chbkdat.cpp < prev    next >
Encoding:
C/C++ Source or Header  |  1999-08-16  |  17.5 KB  |  300 lines

  1. /*
  2. *****************************************************************************************
  3. *                                                                                       *
  4. * COPYRIGHT:                                                                            *
  5. *   (C) Copyright Taligent, Inc.,  1997                                                 *
  6. *   (C) Copyright International Business Machines Corporation,  1997-1998                    *
  7. *   Licensed Material - Program-Property of IBM - All Rights Reserved.                  *
  8. *   US Government Users Restricted Rights - Use, duplication, or disclosure             *
  9. *   restricted by GSA ADP Schedule Contract with IBM Corp.                              *
  10. *                                                                                       *
  11. *****************************************************************************************
  12. *
  13. * File CHBKDAT.CPP
  14. *
  15. * Modification History:
  16. *
  17. *   Date        Name        Description
  18. *   02/18/97    aliu        Converted from OpenClass.
  19. *                           Recoded kRawMapping table for Unicode::getType() type codes.
  20. *                           Made static data members const where appropriate.
  21. *   03/25/97    aliu        Moved into TextBoundaryData; no longer a subclass.
  22. *   04/15/97    aliu        Worked around bug in AIX xlC compiler which occurs if static
  23. *                           arrays contain const elements.
  24. *   05/06/97    aliu        Made SpecialMapping an array of objects instead of pointers,
  25. *                           to help out non-compliant compilers.
  26. *   08/14/98    helena      Sync-up JDK1.2.
  27. *   07/12/99    helena      HPUX 11 CC port.
  28. *****************************************************************************************
  29. */
  30.  
  31. // *****************************************************************************
  32. // This file was generated from the java source file CharacterBreakData.java
  33. // *****************************************************************************
  34.  
  35. #include "txtbdat.h"
  36. #include "wdbktbl.h"
  37. #include "unicdcm.h"
  38. // *****************************************************************************
  39. // class CharacterBreakData
  40. // The following tables contain the transition state data for character break.
  41. // Take forward data for example, the state machine looks like,
  42. //    Diagram 1 : the forward state machine for accent and base
  43. //
  44. //                          accent
  45. //                           ----
  46. //            accent  +----+/    \
  47. //           -------> |SI+2|      |
  48. //          /         +----+<----/ 
  49. //    +----+            |        base       +-------+
  50. // 0->|stop|            +-----------------> |SI_stop|
  51. //    +----+\-------> +----+--------------> +-------+
  52. //            base    |SI+2|     base       
  53. //                    +----+
  54. //                    ^    \
  55. //                    |     |
  56. //                    \----/
  57. //                    accent
  58. //
  59. // *****************************************************************************
  60. // The forward transition states of character boundary data.
  61. TextBoundaryData::Node TextBoundaryData::kCharacterForwardData[] = {
  62.         // acct         base            cr              lf
  63.         // cho          jung            jong            EOS
  64.         kStop,          kStop,          kStop,          kStop,
  65.         kStop,          kStop,          kStop,          kStop,
  66.  
  67.         // 1
  68.         kSI_2,          kSI_2,          kSI_3,          kSI_7,
  69.         kSI_4,          kSI_5,          kSI_6,          kSI_Stop,
  70.  
  71.         // 2
  72.         kSI_2,          kSI_Stop,       kSI_Stop,       kSI_Stop,
  73.         kSI_Stop,       kSI_Stop,       kSI_Stop,       kSI_Stop,
  74.  
  75.         // 3
  76.         kSI_Stop,       kSI_Stop,       kSI_Stop,       kSI_7,
  77.         kSI_Stop,       kSI_Stop,       kSI_Stop,       kSI_Stop,
  78.  
  79.         // 4
  80.         kSI_2,          kSI_Stop,       kSI_Stop,       kSI_Stop,
  81.         kSI_4,          kSI_5,          kSI_6,          kSI_Stop,
  82.  
  83.         // 5
  84.         kSI_2,          kSI_Stop,       kSI_Stop,       kSI_Stop,
  85.         kSI_Stop,       kSI_5,          kSI_6,          kSI_Stop,
  86.  
  87.         // 6
  88.         kSI_2,          kSI_Stop,       kSI_Stop,       kSI_Stop,
  89.         kSI_Stop,       kSI_Stop,       kSI_6,          kSI_Stop,
  90.  
  91.         // 7
  92.         kSI_Stop,       kSI_Stop,       kSI_Stop,       kSI_Stop,
  93.         kSI_Stop,       kSI_Stop,       kSI_Stop,       kSI_Stop
  94. };
  95.  
  96. const int32_t TextBoundaryData::kCharacterForwardData_length =
  97.     sizeof(TextBoundaryData::kCharacterForwardData) / sizeof(TextBoundaryData::kCharacterForwardData[0]);
  98.  
  99. WordBreakTable* TextBoundaryData::kCharacterForward = new WordBreakTable(kCharacterCol_count, kCharacterForwardData, kCharacterForwardData_length);
  100.  
  101.  
  102. // *****************************************************************************
  103. //
  104. //    Diagram 2 : the backward state machine for accent and base
  105. //
  106. //                          accent
  107. //                           ----
  108. //            accent  +----+/    \
  109. //           -------> |SI+1|      |
  110. //          /         +----+<----/ 
  111. //    +----+            |        base       +-------+
  112. // 0->|stop|            +-----------------> |SI_stop|
  113. //    +----+\-----------------------------> +-------+
  114. //                      base       
  115. //
  116. // *****************************************************************************
  117. // The backward transition states of character boundary data.
  118. TextBoundaryData::Node TextBoundaryData::kCharacterBackwardData[] = {
  119.         // acct         base            cr              lf
  120.         // cho          jung            jong            EOS
  121.         kStop,          kStop,          kStop,          kStop,
  122.         kStop,          kStop,          kStop,          kStop,
  123.  
  124.         // 1
  125.         kSI_1,          kSI_Stop,       kSI_Stop,       kSI_1,
  126.         kSI_Stop,       kSI_1,          kSI_1,          kSI_Stop
  127. };
  128.  
  129. const int32_t TextBoundaryData::kCharacterBackwardData_length =
  130.     sizeof(TextBoundaryData::kCharacterBackwardData) / sizeof(TextBoundaryData::kCharacterBackwardData[0]);
  131.  
  132. WordBreakTable* TextBoundaryData::kCharacterBackward = new WordBreakTable(kCharacterCol_count, kCharacterBackwardData, kCharacterBackwardData_length);
  133.  
  134. // The character type mapping of the break table.
  135. TextBoundaryData::Type TextBoundaryData::kCharacterRawMapping[] = {
  136.     // Re-coded to match Unicode 2 types [LIU]
  137.     kBaseForm,          // UNASSIGNED               = 0,
  138.     kBaseForm,          // UPPERCASE_LETTER         = 1,
  139.     kBaseForm,          // LOWERCASE_LETTER         = 2,
  140.     kBaseForm,          // TITLECASE_LETTER         = 3,
  141.     kBaseForm,          // MODIFIER_LETTER          = 4,
  142.     kBaseForm,          // OTHER_LETTER             = 5,
  143.     kAccent_diacritic,  // NON_SPACING_MARK         = 6,
  144.     kAccent_diacritic,  // ENCLOSING_MARK           = 7,
  145.     kBaseForm,          // COMBINING_SPACING_MARK   = 8,
  146.     kBaseForm,          // DECIMAL_DIGIT_NUMBER     = 9,
  147.     kBaseForm,          // LETTER_NUMBER            = 10,
  148.     kBaseForm,          // OTHER_NUMBER             = 11,
  149.     kBaseForm,          // SPACE_SEPARATOR          = 12,
  150.     kBaseForm,          // LINE_SEPARATOR           = 13,
  151.     kBaseForm,          // PARAGRAPH_SEPARATOR      = 14,
  152.     kBaseForm,          // CONTROL                  = 15,
  153.     kBaseForm,          // FORMAT                   = 16,
  154.     kBaseForm,          // PRIVATE_USE              = 17,
  155.     kBaseForm,          // SURROGATE                = 18,
  156.     kBaseForm,          // DASH_PUNCTUATION         = 19,
  157.     kBaseForm,          // START_PUNCTUATION        = 20,
  158.     kBaseForm,          // END_PUNCTUATION          = 21,
  159.     kBaseForm,          // CONNECTOR_PUNCTUATION    = 22,
  160.     kBaseForm,          // OTHER_PUNCTUATION        = 23,
  161.     kBaseForm,          // MATH_SYMBOL              = 24,
  162.     kBaseForm,          // CURRENCY_SYMBOL          = 25,
  163.     kBaseForm,          // MODIFIER_SYMBOL          = 26,
  164.     kBaseForm,          // OTHER_SYMBOL             = 27,
  165.     kBaseForm           // UNDEFINED                = 28
  166. };
  167.  
  168. const int32_t TextBoundaryData::kCharacterRawMapping_length =
  169.     sizeof(TextBoundaryData::kCharacterRawMapping) / sizeof(TextBoundaryData::kCharacterRawMapping[0]);
  170.  
  171. SpecialMapping TextBoundaryData::kCharacterExceptionChar[] = {
  172.         SpecialMapping(TextBoundaryData::ASCII_LINEFEED, TextBoundaryData::kBaseLF),
  173.         SpecialMapping(TextBoundaryData::ASCII_CARRIAGE_RETURN, TextBoundaryData::kBaseCR),
  174.         SpecialMapping(TextBoundaryData::HANGUL_CHOSEONG_LOW, TextBoundaryData::HANGUL_CHOSEONG_HIGH, TextBoundaryData::kChoseong),
  175.         SpecialMapping(TextBoundaryData::HANGUL_JUNGSEONG_LOW, TextBoundaryData::HANGUL_JUNGSEONG_HIGH, TextBoundaryData::kJungseong),
  176.         SpecialMapping(TextBoundaryData::HANGUL_JONGSEONG_LOW, TextBoundaryData::HANGUL_JONGSEONG_HIGH, TextBoundaryData::kJongseong),
  177.         SpecialMapping(TextBoundaryData::PUNCTUATION_LINE_SEPARATOR, TextBoundaryData::PUNCTUATION_PARAGRAPH_SEPARATOR, TextBoundaryData::kBaseLF),
  178.         SpecialMapping(TextBoundaryData::END_OF_STRING, TextBoundaryData::kEOS)
  179. };
  180.  
  181. const int32_t TextBoundaryData::kCharacterExceptionChar_length = 
  182.     sizeof(TextBoundaryData::kCharacterExceptionChar) / sizeof(TextBoundaryData::kCharacterExceptionChar[0]);
  183.  
  184. const bool_t TextBoundaryData::kCharacterExceptionFlags[] = {
  185.         FALSE,          // kNonCharacter            = 0,
  186.         FALSE,          // kUppercaseLetter         = 1,
  187.         FALSE,          // kLowercaseLetter         = 2,
  188.         FALSE,          // kTitlecaseLetter         = 3,
  189.         FALSE,          // kModifierLetter          = 4,
  190.         TRUE,           // kOtherLetter             = 5,
  191.         FALSE,          // kNonSpacingMark          = 6,
  192.         FALSE,          // kEnclosingMark           = 7,
  193.         FALSE,          // kCombiningSpacingMark    = 8,
  194.         FALSE,          // kDecimalNumber           = 9,
  195.         FALSE,          // kLetterNumber            = 10,
  196.         FALSE,          // kOtherNumber             = 11,
  197.         FALSE,          // kSpaceSeparator          = 12,
  198.         TRUE,           // kLineSeparator           = 13,
  199.         TRUE,           // kParagraphSeparator      = 14,
  200.         TRUE,           // kControlCharacter        = 15,
  201.         FALSE,          // kFormatCharacter         = 16,
  202.         FALSE,          // kPrivateUseCharacter     = 17,
  203.         FALSE,          // kSurrogate               = 18,
  204.         FALSE,          // kDashPunctuation         = 19,
  205.         FALSE,          // kOpenPunctuation         = 20,
  206.         FALSE,          // kClosePunctuation        = 21,
  207.         FALSE,          // kConnectorPunctuation    = 22,
  208.         FALSE,          // kOtherPunctuation        = 23,
  209.         FALSE,          // kMathSymbol              = 24,
  210.         FALSE,          // kCurrencySymbol          = 25,
  211.         FALSE,          // kModifierSymbol          = 26,
  212.         FALSE,          // kOtherSymbol             = 27
  213.         FALSE           // UNDEFINED                = 28,
  214.     };
  215.  
  216. TextBoundaryData::Type TextBoundaryData::kCharacterAsciiValues[] = {
  217.         //  null       soh        stx        etx        eot        enq        ask        bell
  218.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  219.         //  bs         ht         lf         vt         ff         cr         so         si
  220.             kBaseForm, kBaseForm, kBaseLF,   kBaseForm, kBaseForm, kBaseCR,   kBaseForm, kBaseForm,
  221.         //  dle        dc1        dc2        dc3        dc4        nak        syn        etb
  222.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  223.         //  can        em         sub        esc        fs         gs         rs         us
  224.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  225.         //  sp         !          "          #          $          %          &          '
  226.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  227.         //  (          )          *          +          ,          -          .          /
  228.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  229.         //  0          1          2          3          4          5          6          7
  230.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  231.         //  8          9          :          ;          <          =          >          ?
  232.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  233.         //  @          A          B          C          D          E          F          G
  234.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  235.         //  H          I          J          K          L          M          N          O
  236.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  237.         //  P          Q          R          S          T          U          V          W
  238.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  239.         //  X          Y          Z          [          \          ]          ^          _
  240.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  241.         //  `          a          b          c          d          e          f          g
  242.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  243.         //  h          i          j          k          l          m          n          o
  244.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  245.         //  p          q          r          s          t          u          v          w
  246.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  247.         //  x          y          z          {          |          }          ~          del
  248.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  249.         //  ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl
  250.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  251.         //  ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl
  252.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  253.         //  ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl
  254.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  255.         //  ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl       ctrl
  256.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  257.         //  nbsp       ¡          ¢          £          ¤          ¥          ¦
  258.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  259.         //  ¨          ©          ª          «          ¬          ­          ®          ¯
  260.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  261.         //  °          ±          ²          ³          ´          µ          ¶          ·
  262.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  263.         //  ¸          ¹          º          »          ¼          ½          ¾          ¿
  264.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  265.         //  À          Á          Â          Ã          Ä          Å          Æ          Ç
  266.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  267.         //  È          É          Ê          Ë          Ì          Í          Î          Ï
  268.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  269.         //  Ð          Ñ          Ò          Ó          Ô          Õ          Ö          ×
  270.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  271.         //  Ø          Ù          Ú          Û          Ü          Ý          Þ          ß
  272.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  273.         //  à          á          â          ã          ä          å          æ          ç
  274.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  275.         //  è          é          ê          ë          ì          í          î          ï
  276.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  277.         //  ð          ñ          ò          ó          ô          õ          ö          ÷
  278.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
  279.         //  ø          ù          ú          û          ü          ý          þ          ÿ
  280.             kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm
  281. };
  282.  
  283.  
  284.  
  285. UnicodeClassMapping* TextBoundaryData::kCharacterMap = 
  286.     new UnicodeClassMapping(kCharacterRawMapping, kCharacterRawMapping_length, 
  287.                             kCharacterExceptionChar, kCharacterExceptionChar_length,
  288.                             kCharacterExceptionFlags,
  289.                             kCharacterAsciiValues );
  290.  
  291. /**
  292.  * This is the single instance of TextBoundaryData containing character
  293.  * break data.
  294.  */
  295. const TextBoundaryData TextBoundaryData::kCharacterBreakData(TextBoundaryData::kCharacterForward,
  296.                                                              TextBoundaryData::kCharacterBackward,
  297.                                                              TextBoundaryData::kCharacterMap);
  298.  
  299. //eof
  300.