home *** CD-ROM | disk | FTP | other *** search
- /*
- *****************************************************************************************
- * *
- * COPYRIGHT: *
- * (C) Copyright Taligent, Inc., 1997 *
- * (C) Copyright International Business Machines Corporation, 1997-1998 *
- * Licensed Material - Program-Property of IBM - All Rights Reserved. *
- * US Government Users Restricted Rights - Use, duplication, or disclosure *
- * restricted by GSA ADP Schedule Contract with IBM Corp. *
- * *
- *****************************************************************************************
- *
- * File CHBKDAT.CPP
- *
- * Modification History:
- *
- * Date Name Description
- * 02/18/97 aliu Converted from OpenClass.
- * Recoded kRawMapping table for Unicode::getType() type codes.
- * Made static data members const where appropriate.
- * 03/25/97 aliu Moved into TextBoundaryData; no longer a subclass.
- * 04/15/97 aliu Worked around bug in AIX xlC compiler which occurs if static
- * arrays contain const elements.
- * 05/06/97 aliu Made SpecialMapping an array of objects instead of pointers,
- * to help out non-compliant compilers.
- * 08/14/98 helena Sync-up JDK1.2.
- * 07/12/99 helena HPUX 11 CC port.
- *****************************************************************************************
- */
-
- // *****************************************************************************
- // This file was generated from the java source file CharacterBreakData.java
- // *****************************************************************************
-
- #include "txtbdat.h"
- #include "wdbktbl.h"
- #include "unicdcm.h"
- // *****************************************************************************
- // class CharacterBreakData
- // The following tables contain the transition state data for character break.
- // Take forward data for example, the state machine looks like,
- // Diagram 1 : the forward state machine for accent and base
- //
- // accent
- // ----
- // accent +----+/ \
- // -------> |SI+2| |
- // / +----+<----/
- // +----+ | base +-------+
- // 0->|stop| +-----------------> |SI_stop|
- // +----+\-------> +----+--------------> +-------+
- // base |SI+2| base
- // +----+
- // ^ \
- // | |
- // \----/
- // accent
- //
- // *****************************************************************************
- // The forward transition states of character boundary data.
- TextBoundaryData::Node TextBoundaryData::kCharacterForwardData[] = {
- // acct base cr lf
- // cho jung jong EOS
- kStop, kStop, kStop, kStop,
- kStop, kStop, kStop, kStop,
-
- // 1
- kSI_2, kSI_2, kSI_3, kSI_7,
- kSI_4, kSI_5, kSI_6, kSI_Stop,
-
- // 2
- kSI_2, kSI_Stop, kSI_Stop, kSI_Stop,
- kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop,
-
- // 3
- kSI_Stop, kSI_Stop, kSI_Stop, kSI_7,
- kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop,
-
- // 4
- kSI_2, kSI_Stop, kSI_Stop, kSI_Stop,
- kSI_4, kSI_5, kSI_6, kSI_Stop,
-
- // 5
- kSI_2, kSI_Stop, kSI_Stop, kSI_Stop,
- kSI_Stop, kSI_5, kSI_6, kSI_Stop,
-
- // 6
- kSI_2, kSI_Stop, kSI_Stop, kSI_Stop,
- kSI_Stop, kSI_Stop, kSI_6, kSI_Stop,
-
- // 7
- kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop,
- kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop
- };
-
- const int32_t TextBoundaryData::kCharacterForwardData_length =
- sizeof(TextBoundaryData::kCharacterForwardData) / sizeof(TextBoundaryData::kCharacterForwardData[0]);
-
- WordBreakTable* TextBoundaryData::kCharacterForward = new WordBreakTable(kCharacterCol_count, kCharacterForwardData, kCharacterForwardData_length);
-
-
- // *****************************************************************************
- //
- // Diagram 2 : the backward state machine for accent and base
- //
- // accent
- // ----
- // accent +----+/ \
- // -------> |SI+1| |
- // / +----+<----/
- // +----+ | base +-------+
- // 0->|stop| +-----------------> |SI_stop|
- // +----+\-----------------------------> +-------+
- // base
- //
- // *****************************************************************************
- // The backward transition states of character boundary data.
- TextBoundaryData::Node TextBoundaryData::kCharacterBackwardData[] = {
- // acct base cr lf
- // cho jung jong EOS
- kStop, kStop, kStop, kStop,
- kStop, kStop, kStop, kStop,
-
- // 1
- kSI_1, kSI_Stop, kSI_Stop, kSI_1,
- kSI_Stop, kSI_1, kSI_1, kSI_Stop
- };
-
- const int32_t TextBoundaryData::kCharacterBackwardData_length =
- sizeof(TextBoundaryData::kCharacterBackwardData) / sizeof(TextBoundaryData::kCharacterBackwardData[0]);
-
- WordBreakTable* TextBoundaryData::kCharacterBackward = new WordBreakTable(kCharacterCol_count, kCharacterBackwardData, kCharacterBackwardData_length);
-
- // The character type mapping of the break table.
- TextBoundaryData::Type TextBoundaryData::kCharacterRawMapping[] = {
- // Re-coded to match Unicode 2 types [LIU]
- kBaseForm, // UNASSIGNED = 0,
- kBaseForm, // UPPERCASE_LETTER = 1,
- kBaseForm, // LOWERCASE_LETTER = 2,
- kBaseForm, // TITLECASE_LETTER = 3,
- kBaseForm, // MODIFIER_LETTER = 4,
- kBaseForm, // OTHER_LETTER = 5,
- kAccent_diacritic, // NON_SPACING_MARK = 6,
- kAccent_diacritic, // ENCLOSING_MARK = 7,
- kBaseForm, // COMBINING_SPACING_MARK = 8,
- kBaseForm, // DECIMAL_DIGIT_NUMBER = 9,
- kBaseForm, // LETTER_NUMBER = 10,
- kBaseForm, // OTHER_NUMBER = 11,
- kBaseForm, // SPACE_SEPARATOR = 12,
- kBaseForm, // LINE_SEPARATOR = 13,
- kBaseForm, // PARAGRAPH_SEPARATOR = 14,
- kBaseForm, // CONTROL = 15,
- kBaseForm, // FORMAT = 16,
- kBaseForm, // PRIVATE_USE = 17,
- kBaseForm, // SURROGATE = 18,
- kBaseForm, // DASH_PUNCTUATION = 19,
- kBaseForm, // START_PUNCTUATION = 20,
- kBaseForm, // END_PUNCTUATION = 21,
- kBaseForm, // CONNECTOR_PUNCTUATION = 22,
- kBaseForm, // OTHER_PUNCTUATION = 23,
- kBaseForm, // MATH_SYMBOL = 24,
- kBaseForm, // CURRENCY_SYMBOL = 25,
- kBaseForm, // MODIFIER_SYMBOL = 26,
- kBaseForm, // OTHER_SYMBOL = 27,
- kBaseForm // UNDEFINED = 28
- };
-
- const int32_t TextBoundaryData::kCharacterRawMapping_length =
- sizeof(TextBoundaryData::kCharacterRawMapping) / sizeof(TextBoundaryData::kCharacterRawMapping[0]);
-
- SpecialMapping TextBoundaryData::kCharacterExceptionChar[] = {
- SpecialMapping(TextBoundaryData::ASCII_LINEFEED, TextBoundaryData::kBaseLF),
- SpecialMapping(TextBoundaryData::ASCII_CARRIAGE_RETURN, TextBoundaryData::kBaseCR),
- SpecialMapping(TextBoundaryData::HANGUL_CHOSEONG_LOW, TextBoundaryData::HANGUL_CHOSEONG_HIGH, TextBoundaryData::kChoseong),
- SpecialMapping(TextBoundaryData::HANGUL_JUNGSEONG_LOW, TextBoundaryData::HANGUL_JUNGSEONG_HIGH, TextBoundaryData::kJungseong),
- SpecialMapping(TextBoundaryData::HANGUL_JONGSEONG_LOW, TextBoundaryData::HANGUL_JONGSEONG_HIGH, TextBoundaryData::kJongseong),
- SpecialMapping(TextBoundaryData::PUNCTUATION_LINE_SEPARATOR, TextBoundaryData::PUNCTUATION_PARAGRAPH_SEPARATOR, TextBoundaryData::kBaseLF),
- SpecialMapping(TextBoundaryData::END_OF_STRING, TextBoundaryData::kEOS)
- };
-
- const int32_t TextBoundaryData::kCharacterExceptionChar_length =
- sizeof(TextBoundaryData::kCharacterExceptionChar) / sizeof(TextBoundaryData::kCharacterExceptionChar[0]);
-
- const bool_t TextBoundaryData::kCharacterExceptionFlags[] = {
- FALSE, // kNonCharacter = 0,
- FALSE, // kUppercaseLetter = 1,
- FALSE, // kLowercaseLetter = 2,
- FALSE, // kTitlecaseLetter = 3,
- FALSE, // kModifierLetter = 4,
- TRUE, // kOtherLetter = 5,
- FALSE, // kNonSpacingMark = 6,
- FALSE, // kEnclosingMark = 7,
- FALSE, // kCombiningSpacingMark = 8,
- FALSE, // kDecimalNumber = 9,
- FALSE, // kLetterNumber = 10,
- FALSE, // kOtherNumber = 11,
- FALSE, // kSpaceSeparator = 12,
- TRUE, // kLineSeparator = 13,
- TRUE, // kParagraphSeparator = 14,
- TRUE, // kControlCharacter = 15,
- FALSE, // kFormatCharacter = 16,
- FALSE, // kPrivateUseCharacter = 17,
- FALSE, // kSurrogate = 18,
- FALSE, // kDashPunctuation = 19,
- FALSE, // kOpenPunctuation = 20,
- FALSE, // kClosePunctuation = 21,
- FALSE, // kConnectorPunctuation = 22,
- FALSE, // kOtherPunctuation = 23,
- FALSE, // kMathSymbol = 24,
- FALSE, // kCurrencySymbol = 25,
- FALSE, // kModifierSymbol = 26,
- FALSE, // kOtherSymbol = 27
- FALSE // UNDEFINED = 28,
- };
-
- TextBoundaryData::Type TextBoundaryData::kCharacterAsciiValues[] = {
- // null soh stx etx eot enq ask bell
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // bs ht lf vt ff cr so si
- kBaseForm, kBaseForm, kBaseLF, kBaseForm, kBaseForm, kBaseCR, kBaseForm, kBaseForm,
- // dle dc1 dc2 dc3 dc4 nak syn etb
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // can em sub esc fs gs rs us
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // sp ! " # $ % & '
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ( ) * + , - . /
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // 0 1 2 3 4 5 6 7
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // 8 9 : ; < = > ?
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // @ A B C D E F G
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // H I J K L M N O
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // P Q R S T U V W
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // X Y Z [ \ ] ^ _
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ` a b c d e f g
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // h i j k l m n o
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // p q r s t u v w
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // x y z { | } ~ del
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // nbsp ¡ ¢ £ ¤ ¥ ¦
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ¨ © ª « ¬ ® ¯
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ° ± ² ³ ´ µ ¶ ·
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ¸ ¹ º » ¼ ½ ¾ ¿
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // À Á Â Ã Ä Å Æ Ç
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // È É Ê Ë Ì Í Î Ï
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // Ð Ñ Ò Ó Ô Õ Ö ×
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // Ø Ù Ú Û Ü Ý Þ ß
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // à á â ã ä å æ ç
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // è é ê ë ì í î ï
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ð ñ ò ó ô õ ö ÷
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm,
- // ø ù ú û ü ý þ ÿ
- kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm
- };
-
-
-
- UnicodeClassMapping* TextBoundaryData::kCharacterMap =
- new UnicodeClassMapping(kCharacterRawMapping, kCharacterRawMapping_length,
- kCharacterExceptionChar, kCharacterExceptionChar_length,
- kCharacterExceptionFlags,
- kCharacterAsciiValues );
-
- /**
- * This is the single instance of TextBoundaryData containing character
- * break data.
- */
- const TextBoundaryData TextBoundaryData::kCharacterBreakData(TextBoundaryData::kCharacterForward,
- TextBoundaryData::kCharacterBackward,
- TextBoundaryData::kCharacterMap);
-
- //eof
-