home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 6 / AACD06.ISO / AACD / Programming / ICU / src / icu / source / i18n / txtbdat.cpp < prev    next >
Encoding:
C/C++ Source or Header  |  1999-08-16  |  11.7 KB  |  162 lines

  1. /*
  2. *****************************************************************************************
  3. *                                                                                       *
  4. * COPYRIGHT:                                                                            *
  5. *   (C) Copyright Taligent, Inc.,  1997                                                 *
  6. *   (C) Copyright International Business Machines Corporation,  1997-1998                    *
  7. *   Licensed Material - Program-Property of IBM - All Rights Reserved.                  *
  8. *   US Government Users Restricted Rights - Use, duplication, or disclosure             *
  9. *   restricted by GSA ADP Schedule Contract with IBM Corp.                              *
  10. *                                                                                       *
  11. *****************************************************************************************
  12. *
  13. * File TXTBDAT.CPP
  14. *
  15. * Modification History:
  16. *
  17. *   Date        Name        Description
  18. *   02/18/97    aliu        Converted from OpenClass.
  19. *                           Made static data members const where appropriate.
  20. *   05/06/97    aliu        Made kSI, kStop, and kSI_Stop into #defines to help out
  21. *                           non-compliant compilers.
  22. *****************************************************************************************
  23. */
  24.  
  25. #include "txtbdat.h"
  26.  
  27. // *****************************************************************************
  28. // class TextBoundaryData
  29. // *****************************************************************************
  30.  
  31. // The following is removed and became #define(s) because of compiler problems.
  32. //const TextBoundaryData::Node TextBoundaryData::kSI        = 0x80;
  33. //const TextBoundaryData::Node TextBoundaryData::kStop      = 0;
  34. //const TextBoundaryData::Node TextBoundaryData::kSI_Stop   = kSI + kStop;
  35.  
  36. // The following Unicode character may need special mappings in a particular
  37. // text boundary.
  38. const UChar TextBoundaryData::ASCII_END_OF_TEXT                   = (UChar)0x0003;
  39. const UChar TextBoundaryData::ASCII_HORIZONTAL_TABULATION         = (UChar)0x0009;
  40. const UChar TextBoundaryData::ASCII_LINEFEED                      = (UChar)0x000A;
  41. const UChar TextBoundaryData::ASCII_VERTICAL_TABULATION           = (UChar)0x000B;
  42. const UChar TextBoundaryData::ASCII_FORM_FEED                     = (UChar)0x000C;
  43. const UChar TextBoundaryData::ASCII_CARRIAGE_RETURN               = (UChar)0x000D;
  44. const UChar TextBoundaryData::ASCII_SPACE                         = (UChar)0x0020;
  45. const UChar TextBoundaryData::ASCII_EXCLAMATION_MARK              = (UChar)0x0021;
  46. const UChar TextBoundaryData::ASCII_QUOTATION_MARK                = (UChar)0x0022;
  47. const UChar TextBoundaryData::ASCII_NUMBER_SIGN                   = (UChar)0x0023;
  48. const UChar TextBoundaryData::ASCII_DOLLAR_SIGN                   = (UChar)0x0024;
  49. const UChar TextBoundaryData::ASCII_PERCENT                       = (UChar)0x0025;
  50. const UChar TextBoundaryData::ASCII_AMPERSAND                     = (UChar)0x0026;
  51. const UChar TextBoundaryData::ASCII_APOSTROPHE                    = (UChar)0x0027;
  52. const UChar TextBoundaryData::ASCII_COMMA                         = (UChar)0x002C;
  53. const UChar TextBoundaryData::ASCII_FULL_STOP                     = (UChar)0x002E;
  54. const UChar TextBoundaryData::ASCII_COLON                         = (UChar)0x003A;
  55. const UChar TextBoundaryData::ASCII_SEMICOLON                     = (UChar)0x003B;
  56. const UChar TextBoundaryData::ASCII_QUESTION_MARK                 = (UChar)0x003F;
  57. const UChar TextBoundaryData::ASCII_NONBREAKING_SPACE             = (UChar)0x00A0;
  58. const UChar TextBoundaryData::ASCII_CENT_SIGN                     = (UChar)0x00A2;
  59. const UChar TextBoundaryData::ASCII_POUND_SIGN                    = (UChar)0x00A3;
  60. const UChar TextBoundaryData::ASCII_YEN_SIGN                      = (UChar)0x00A5;
  61. const UChar TextBoundaryData::LATIN1_SOFTHYPHEN                   = (UChar)0x00AD;
  62. const UChar TextBoundaryData::LATIN1_DEGREE_SIGN                  = (UChar)0x00B0;
  63. const UChar TextBoundaryData::ARABIC_PERCENT_SIGN                 = (UChar)0x066A;
  64. const UChar TextBoundaryData::ARABIC_DECIMAL_SEPARATOR            = (UChar)0x066B;
  65. const UChar TextBoundaryData::HANGUL_CHOSEONG_LOW                 = (UChar)0x1100;
  66. const UChar TextBoundaryData::HANGUL_CHOSEONG_HIGH                = (UChar)0x115F;
  67. const UChar TextBoundaryData::HANGUL_JUNGSEONG_LOW                = (UChar)0x1160;
  68. const UChar TextBoundaryData::HANGUL_JUNGSEONG_HIGH               = (UChar)0x11A7;
  69. const UChar TextBoundaryData::HANGUL_JONGSEONG_LOW                = (UChar)0x11A8;
  70. const UChar TextBoundaryData::HANGUL_JONGSEONG_HIGH               = (UChar)0x11FF;
  71. const UChar TextBoundaryData::FIGURE_SPACE                        = (UChar)0x2007;
  72. const UChar TextBoundaryData::NONBREAKING_HYPHEN                  = (UChar)0x2011;
  73. const UChar TextBoundaryData::PUNCTUATION_HYPHENATION_POINT       = (UChar)0x2027;
  74. const UChar TextBoundaryData::PUNCTUATION_LINE_SEPARATOR          = (UChar)0x2028;
  75. const UChar TextBoundaryData::PUNCTUATION_PARAGRAPH_SEPARATOR     = (UChar)0x2029;
  76. const UChar TextBoundaryData::PER_MILLE_SIGN                      = (UChar)0x2030;
  77. const UChar TextBoundaryData::PER_TEN_THOUSAND_SIGN               = (UChar)0x2031;
  78. const UChar TextBoundaryData::PRIME                               = (UChar)0x2032;
  79. const UChar TextBoundaryData::DOUBLE_PRIME                        = (UChar)0x2033;
  80. const UChar TextBoundaryData::TRIPLE_PRIME                        = (UChar)0x2034;
  81. const UChar TextBoundaryData::DEGREE_CELSIUS                      = (UChar)0x2103;
  82. const UChar TextBoundaryData::DEGREE_FAHRENHEIT                   = (UChar)0x2109;
  83. const UChar TextBoundaryData::PUNCTUATION_IDEOGRAPHIC_COMMA       = (UChar)0x3001;
  84. const UChar TextBoundaryData::PUNCTUATION_IDEOGRAPHIC_FULL_STOP   = (UChar)0x3002;
  85. const UChar TextBoundaryData::IDEOGRAPHIC_ITERATION_MARK          = (UChar)0x3005;
  86. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_A             = (UChar)0x3041;
  87. const UChar TextBoundaryData::HIRAGANA_LETTER_A                   = (UChar)0x3042;
  88. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_I             = (UChar)0x3043;
  89. const UChar TextBoundaryData::HIRAGANA_LETTER_I                   = (UChar)0x3044;
  90. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_U             = (UChar)0x3045;
  91. const UChar TextBoundaryData::HIRAGANA_LETTER_U                   = (UChar)0x3046;
  92. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_E             = (UChar)0x3047;
  93. const UChar TextBoundaryData::HIRAGANA_LETTER_E                   = (UChar)0x3048;
  94. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_O             = (UChar)0x3049;
  95. const UChar TextBoundaryData::HIRAGANA_LETTER_O                   = (UChar)0x304A;
  96. const UChar TextBoundaryData::HIRAGANA_LETTER_DI                  = (UChar)0x3062;
  97. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_TU            = (UChar)0x3063;
  98. const UChar TextBoundaryData::HIRAGANA_LETTER_TU                  = (UChar)0x3064;
  99. const UChar TextBoundaryData::HIRAGANA_LETTER_MO                  = (UChar)0x3082;
  100. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YA            = (UChar)0x3083;
  101. const UChar TextBoundaryData::HIRAGANA_LETTER_YA                  = (UChar)0x3084;
  102. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YU            = (UChar)0x3085;
  103. const UChar TextBoundaryData::HIRAGANA_LETTER_YU                  = (UChar)0x3086;
  104. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YO            = (UChar)0x3087;
  105. const UChar TextBoundaryData::HIRAGANA_LETTER_YO                  = (UChar)0x3088;
  106. const UChar TextBoundaryData::HIRAGANA_LETTER_RO                  = (UChar)0x308D;
  107. const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_WA            = (UChar)0x308E;
  108. const UChar TextBoundaryData::HIRAGANA_LETTER_WA                  = (UChar)0x308F;
  109. const UChar TextBoundaryData::HIRAGANA_LETTER_VU                  = (UChar)0x3094;
  110. const UChar TextBoundaryData::COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK = (UChar)0x3099;
  111. const UChar TextBoundaryData::HIRAGANA_SEMIVOICED_SOUND_MARK      = (UChar)0x309C;
  112. const UChar TextBoundaryData::HIRAGANA_ITERATION_MARK             = (UChar)0x309D;
  113. const UChar TextBoundaryData::HIRAGANA_VOICED_ITERATION_MARK      = (UChar)0x309E;
  114. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_A             = (UChar)0x30A1;
  115. const UChar TextBoundaryData::KATAKANA_LETTER_A                   = (UChar)0x30A2;
  116. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_I             = (UChar)0x30A3;
  117. const UChar TextBoundaryData::KATAKANA_LETTER_I                   = (UChar)0x30A4;
  118. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_U             = (UChar)0x30A5;
  119. const UChar TextBoundaryData::KATAKANA_LETTER_U                   = (UChar)0x30A6;
  120. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_E             = (UChar)0x30A7;
  121. const UChar TextBoundaryData::KATAKANA_LETTER_E                   = (UChar)0x30A8;
  122. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_O             = (UChar)0x30A9;
  123. const UChar TextBoundaryData::KATAKANA_LETTER_O                   = (UChar)0x30AA;
  124. const UChar TextBoundaryData::KATAKANA_LETTER_DI                  = (UChar)0x30C2;
  125. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_TU            = (UChar)0x30C3;
  126. const UChar TextBoundaryData::KATAKANA_LETTER_TU                  = (UChar)0x30C4;
  127. const UChar TextBoundaryData::KATAKANA_LETTER_MO                  = (UChar)0x30E2;
  128. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YA            = (UChar)0x30E3;
  129. const UChar TextBoundaryData::KATAKANA_LETTER_YA                  = (UChar)0x30E4;
  130. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YU            = (UChar)0x30E5;
  131. const UChar TextBoundaryData::KATAKANA_LETTER_YU                  = (UChar)0x30E6;
  132. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YO            = (UChar)0x30E7;
  133. const UChar TextBoundaryData::KATAKANA_LETTER_YO                  = (UChar)0x30E8;
  134. const UChar TextBoundaryData::KATAKANA_LETTER_RO                  = (UChar)0x30ED;
  135. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_WA            = (UChar)0x30EE;
  136. const UChar TextBoundaryData::KATAKANA_LETTER_WA                  = (UChar)0x30EF;
  137. const UChar TextBoundaryData::KATAKANA_LETTER_VU                  = (UChar)0x30F4;
  138. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_KA            = (UChar)0x30F5;
  139. const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_KE            = (UChar)0x30F6;
  140. const UChar TextBoundaryData::KATAKANA_LETTER_VA                  = (UChar)0x30F7;
  141. const UChar TextBoundaryData::KATAKANA_LETTER_VO                  = (UChar)0x30FA;
  142. const UChar TextBoundaryData::KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK = (UChar)0x30FC;
  143. const UChar TextBoundaryData::KATAKANA_ITERATION_MARK             = (UChar)0x30FD;
  144. const UChar TextBoundaryData::KATAKANA_VOICED_ITERATION_MARK      = (UChar)0x30FE;
  145. const UChar TextBoundaryData::UNICODE_LOW_BOUND_HAN               = (UChar)0x4E00;
  146. const UChar TextBoundaryData::UNICODE_HIGH_BOUND_HAN              = (UChar)0x9FA5;
  147. const UChar TextBoundaryData::HANGUL_SYL_LOW                      = (UChar)0xAC00;
  148. const UChar TextBoundaryData::HANGUL_SYL_HIGH                     = (UChar)0xD7A3;
  149. const UChar TextBoundaryData::CJK_COMPATIBILITY_F900              = (UChar)0xF900;
  150. const UChar TextBoundaryData::CJK_COMPATIBILITY_FA2D              = (UChar)0xFA2D;
  151. const UChar TextBoundaryData::UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE = (UChar)0xFEFF;
  152. const UChar TextBoundaryData::FULLWIDTH_EXCLAMATION_MARK          = (UChar)0xFF01;
  153. const UChar TextBoundaryData::FULLWIDTH_FULL_STOP                 = (UChar)0xFF0E;
  154. const UChar TextBoundaryData::FULLWIDTH_QUESTION_MARK             = (UChar)0xFF1F;
  155.  
  156.     // SimpleTextBoundary has an internal convention that the not-a-Unicode value
  157.     // $FFFF is used to signify the end of the string when looking a proper state
  158.     // transition for the end of the string
  159. const UChar TextBoundaryData::END_OF_STRING                       = (UChar)0xFFFF;
  160.  
  161. //eof
  162.