home *** CD-ROM | disk | FTP | other *** search
-
- /*
- ********************************************************************
- * COPYRIGHT:
- * (C) Copyright Taligent, Inc., 1997
- * (C) Copyright International Business Machines Corporation, 1997 - 1998
- * Licensed Material - Program-Property of IBM - All Rights Reserved.
- * US Government Users Restricted Rights - Use, duplication, or disclosure
- * restricted by GSA ADP Schedule Contract with IBM Corp.
- *
- ********************************************************************
- */
-
- #ifndef _COLL
- #include "coll.h"
- #endif
-
- #ifndef _TBLCOLL
- #include "tblcoll.h"
- #endif
-
- #ifndef _UNISTR
- #include "unistr.h"
- #endif
-
- #ifndef _SORTKEY
- #include "sortkey.h"
- #endif
-
- #ifndef _ALLCOLL
- #include "allcoll.h"
- #endif
-
- static const UChar DEFAULTRULEARRAY[] =
- {
- '=', '\'', (UChar)0x200B, '\'', '=', (UChar)0x200C, '=', (UChar)0x200D, '=', (UChar)0x200E, '=', (UChar)0x200F
- , '=', (UChar)0x0001, '=', (UChar)0x0002, '=', (UChar)0x0003, '=', (UChar)0x0004
- , '=', (UChar)0x0005, '=', (UChar)0x0006, '=', (UChar)0x0007, '=', (UChar)0x0008, '=', '\'', (UChar)0x0009, '\''
- , '=', '\'', (UChar)0x000b, '\'', '=', (UChar)0x000e //vt,, so
- , '=', (UChar)0x000f, '=', '\'', (UChar)0x0010, '\'', '=', (UChar)0x0011, '=', (UChar)0x0012, '=', (UChar)0x0013 //si, dle, dc1, dc2, dc3
- , '=', (UChar)0x0014, '=', (UChar)0x0015, '=', (UChar)0x0016, '=', (UChar)0x0017, '=', (UChar)0x0018 //dc4, nak, syn, etb, can
- , '=', (UChar)0x0019, '=', (UChar)0x001a, '=', (UChar)0x001b, '=', (UChar)0x001c, '=', (UChar)0x001d //em, sub, esc, fs, gs
- , '=', (UChar)0x001e, '=', (UChar)0x001f, '=', (UChar)0x007f //rs, us, del
- //....then the C1 Latin 1 reserved control codes
- , '=', (UChar)0x0080, '=', (UChar)0x0081, '=', (UChar)0x0082, '=', (UChar)0x0083, '=', (UChar)0x0084, '=', (UChar)0x0085
- , '=', (UChar)0x0086, '=', (UChar)0x0087, '=', (UChar)0x0088, '=', (UChar)0x0089, '=', (UChar)0x008a, '=', (UChar)0x008b
- , '=', (UChar)0x008c, '=', (UChar)0x008d, '=', (UChar)0x008e, '=', (UChar)0x008f, '=', (UChar)0x0090, '=', (UChar)0x0091
- , '=', (UChar)0x0092, '=', (UChar)0x0093, '=', (UChar)0x0094, '=', (UChar)0x0095, '=', (UChar)0x0096, '=', (UChar)0x0097
- , '=', (UChar)0x0098, '=', (UChar)0x0099, '=', (UChar)0x009a, '=', (UChar)0x009b, '=', (UChar)0x009c, '=', (UChar)0x009d
- , '=', (UChar)0x009e, '=', (UChar)0x009f
- // IGNORE except for secondary, tertiary difference
- // Spaces
- , ';', '\'', (UChar)0x0020, '\'', ';', '\'', (UChar)0x00A0, '\'' // spaces
- , ';', '\'', (UChar)0x2000, '\'', ';', '\'', (UChar)0x2001, '\'', ';', '\'', (UChar)0x2002, '\'', ';', '\'', (UChar)0x2003, '\'', ';', '\'', (UChar)0x2004, '\'' // spaces
- , ';', '\'', (UChar)0x2005, '\'', ';', '\'', (UChar)0x2006, '\'', ';', '\'', (UChar)0x2007, '\'', ';', '\'', (UChar)0x2008, '\'', ';', '\'', (UChar)0x2009, '\'' // spaces
- , ';', '\'', (UChar)0x200A, '\'', ';', '\'', (UChar)0x3000, '\'', ';', '\'', (UChar)0xFEFF, '\'' // spaces
- , ';', '\'', '\r', '\'', ';', '\'', '\t', '\'', ';', '\'', '\n', '\'', ';', '\'', '\f', '\'', ';', '\'', (UChar)0x000b, '\'' // whitespace
-
- // Non-spacing accents
-
- , ';', (UChar)0x0301 // non-spacing acute accent
- , ';', (UChar)0x0300 // non-spacing grave accent
- , ';', (UChar)0x0306 // non-spacing breve accent
- , ';', (UChar)0x0302 // non-spacing circumflex accent
- , ';', (UChar)0x030c // non-spacing caron/hacek accent
- , ';', (UChar)0x030a // non-spacing ring above accent
- , ';', (UChar)0x030d // non-spacing vertical line above
- , ';', (UChar)0x0308 // non-spacing diaeresis accent
- , ';', (UChar)0x030b // non-spacing double acute accent
- , ';', (UChar)0x0303 // non-spacing tilde accent
- , ';', (UChar)0x0307 // non-spacing dot above/overdot accent
- , ';', (UChar)0x0304 // non-spacing macron accent
- , ';', (UChar)0x0337 // non-spacing short slash overlay (overstruck diacritic)
- , ';', (UChar)0x0327 // non-spacing cedilla accent
- , ';', (UChar)0x0328 // non-spacing ogonek accent
- , ';', (UChar)0x0323 // non-spacing dot-below/underdot accent
- , ';', (UChar)0x0332 // non-spacing underscore/underline accent
- // with the rest of the general diacritical marks in binary order
- , ';', (UChar)0x0305 // non-spacing overscore/overline
- , ';', (UChar)0x0309 // non-spacing hook above
- , ';', (UChar)0x030e // non-spacing double vertical line above
- , ';', (UChar)0x030f // non-spacing double grave
- , ';', (UChar)0x0310 // non-spacing chandrabindu
- , ';', (UChar)0x0311 // non-spacing inverted breve
- , ';', (UChar)0x0312 // non-spacing turned comma above/cedilla above
- , ';', (UChar)0x0313 // non-spacing comma above
- , ';', (UChar)0x0314 // non-spacing reversed comma above
- , ';', (UChar)0x0315 // non-spacing comma above right
- , ';', (UChar)0x0316 // non-spacing grave below
- , ';', (UChar)0x0317 // non-spacing acute below
- , ';', (UChar)0x0318 // non-spacing left tack below
- , ';', (UChar)0x0319 // non-spacing tack below
- , ';', (UChar)0x031a // non-spacing left angle above
- , ';', (UChar)0x031b // non-spacing horn
- , ';', (UChar)0x031c // non-spacing left half ring below
- , ';', (UChar)0x031d // non-spacing up tack below
- , ';', (UChar)0x031e // non-spacing down tack below
- , ';', (UChar)0x031f // non-spacing plus sign below
- , ';', (UChar)0x0320 // non-spacing minus sign below
- , ';', (UChar)0x0321 // non-spacing palatalized hook below
- , ';', (UChar)0x0322 // non-spacing retroflex hook below
- , ';', (UChar)0x0324 // non-spacing double dot below
- , ';', (UChar)0x0325 // non-spacing ring below
- , ';', (UChar)0x0326 // non-spacing comma below
- , ';', (UChar)0x0329 // non-spacing vertical line below
- , ';', (UChar)0x032a // non-spacing bridge below
- , ';', (UChar)0x032b // non-spacing inverted double arch below
- , ';', (UChar)0x032c // non-spacing hacek below
- , ';', (UChar)0x032d // non-spacing circumflex below
- , ';', (UChar)0x032e // non-spacing breve below
- , ';', (UChar)0x032f // non-spacing inverted breve below
- , ';', (UChar)0x0330 // non-spacing tilde below
- , ';', (UChar)0x0331 // non-spacing macron below
- , ';', (UChar)0x0333 // non-spacing double underscore
- , ';', (UChar)0x0334 // non-spacing tilde overlay
- , ';', (UChar)0x0335 // non-spacing short bar overlay
- , ';', (UChar)0x0336 // non-spacing long bar overlay
- , ';', (UChar)0x0338 // non-spacing long slash overlay
- , ';', (UChar)0x0339 // non-spacing right half ring below
- , ';', (UChar)0x033a // non-spacing inverted bridge below
- , ';', (UChar)0x033b // non-spacing square below
- , ';', (UChar)0x033c // non-spacing seagull below
- , ';', (UChar)0x033d // non-spacing x above
- , ';', (UChar)0x033e // non-spacing vertical tilde
- , ';', (UChar)0x033f // non-spacing double overscore
- , ';', (UChar)0x0340 // non-spacing grave tone mark
- , ';', (UChar)0x0341 // non-spacing acute tone mark
- , ';', (UChar)0x0342, ';', (UChar)0x0343, ';', (UChar)0x0344, ';', (UChar)0x0345, ';', (UChar)0x0360, ';', (UChar)0x0361 // newer
- , ';', (UChar)0x0483, ';', (UChar)0x0484, ';', (UChar)0x0485, ';', (UChar)0x0486 // Cyrillic accents
-
- , ';', (UChar)0x20D0, ';', (UChar)0x20D1, ';', (UChar)0x20D2 // symbol accents
- , ';', (UChar)0x20D3, ';', (UChar)0x20D4, ';', (UChar)0x20D5 // symbol accents
- , ';', (UChar)0x20D6, ';', (UChar)0x20D7, ';', (UChar)0x20D8 // symbol accents
- , ';', (UChar)0x20D9, ';', (UChar)0x20DA, ';', (UChar)0x20DB // symbol accents
- , ';', (UChar)0x20DC, ';', (UChar)0x20DD, ';', (UChar)0x20DE // symbol accents
- , ';', (UChar)0x20DF, ';', (UChar)0x20E0, ';', (UChar)0x20E1 // symbol accents
-
- , ',', '\'', (UChar)0x002D, '\'', ';', (UChar)0x00AD // dashes
- , ';', (UChar)0x2010, ';', (UChar)0x2011, ';', (UChar)0x2012 // dashes
- , ';', (UChar)0x2013, ';', (UChar)0x2014, ';', (UChar)0x2015 // dashes
- , ';', (UChar)0x2212 // dashes
-
- // other punctuation
-
- , '<', '\'', (UChar)0x005f, '\'' // underline/underscore (spacing)
- , '<', (UChar)0x00af // overline or macron (spacing)
- // , '<', (UChar)0x00ad // syllable hyphen (SHY) or soft hyphen
- , '<', '\'', (UChar)0x002c, '\'' // comma (spacing)
- , '<', '\'', (UChar)0x003b, '\'' // semicolon
- , '<', '\'', (UChar)0x003a, '\'' // colon
- , '<', '\'', (UChar)0x0021, '\'' // exclamation point
- , '<', (UChar)0x00a1 // inverted exclamation point
- , '<', '\'', (UChar)0x003f, '\'' // question mark
- , '<', (UChar)0x00bf // inverted question mark
- , '<', '\'', (UChar)0x002f, '\'' // slash
- , '<', '\'', (UChar)0x002e, '\'' // period/full stop
- , '<', (UChar)0x00b4 // acute accent (spacing)
- , '<', '\'', (UChar)0x0060, '\'' // grave accent (spacing)
- , '<', '\'', (UChar)0x005e, '\'' // circumflex accent (spacing)
- , '<', (UChar)0x00a8 // diaresis/umlaut accent (spacing)
- , '<', '\'', (UChar)0x007e, '\'' // tilde accent (spacing)
- , '<', (UChar)0x00b7 // middle dot (spacing)
- , '<', (UChar)0x00b8 // cedilla accent (spacing)
- , '<', '\'', (UChar)0x0027, '\'' // apostrophe
- , '<', '\'', '"', '\'' // quotation marks
- , '<', (UChar)0x00ab // left angle quotes
- , '<', (UChar)0x00bb // right angle quotes
- , '<', '\'', (UChar)0x0028, '\'' // left parenthesis
- , '<', '\'', (UChar)0x0029, '\'' // right parenthesis
- , '<', '\'', (UChar)0x005b, '\'' // left bracket
- , '<', '\'', (UChar)0x005d, '\'' // right bracket
- , '<', '\'', (UChar)0x007b, '\'' // left brace
- , '<', '\'', (UChar)0x007d, '\'' // right brace
- , '<', (UChar)0x00a7 // section symbol
- , '<', (UChar)0x00b6 // paragraph symbol
- , '<', (UChar)0x00a9 // copyright symbol
- , '<', (UChar)0x00ae // registered trademark symbol
- , '<', '\'', (UChar)0x0040, '\'' // at sign
- , '<', (UChar)0x00a4 // international currency symbol
- , '<', (UChar)0x00a2 // cent sign
- , '<', '\'', (UChar)0x0024, '\'' // dollar sign
- , '<', (UChar)0x00a3 // pound-sterling sign
- , '<', (UChar)0x00a5 // yen sign
- , '<', '\'', (UChar)0x002a, '\'' // asterisk
- , '<', '\'', (UChar)0x005c, '\'' // backslash
- , '<', '\'', (UChar)0x0026, '\'' // ampersand
- , '<', '\'', (UChar)0x0023, '\'' // number sign
- , '<', '\'', (UChar)0x0025, '\'' // percent sign
- , '<', '\'', (UChar)0x002b, '\'' // plus sign
- // , '<', (UChar)0x002d // hyphen or minus sign
- , '<', (UChar)0x00b1 // plus-or-minus sign
- , '<', (UChar)0x00f7 // divide sign
- , '<', (UChar)0x00d7 // multiply sign
- , '<', '\'', (UChar)0x003c, '\'' // less-than sign
- , '<', '\'', (UChar)0x003d, '\'' // equal sign
- , '<', '\'', (UChar)0x003e, '\'' // greater-than sign
- , '<', (UChar)0x00ac // end of line symbol/logical NOT symbol
- , '<', '\'', (UChar)0x007c, '\'' // vertical line/logical OR symbol
- , '<', (UChar)0x00a6 // broken vertical line
- , '<', (UChar)0x00b0 // degree symbol
- , '<', (UChar)0x00b5 // micro symbol
-
- // NUMERICS
-
- , '<', '0', '<', '1', '<', '2', '<', '3', '<', '4', '<', '5', '<', '6', '<', '7', '<', '8', '<', '9'
- , '<', (UChar)0x00bc, '<', (UChar)0x00bd, '<', (UChar)0x00be // 1/4,1/2,3/4 fractions
-
- // NON-IGNORABLES
- , '<', 'a', ',', 'A'
- , '<', 'b', ',', 'B'
- , '<', 'c', ',', 'C'
- , '<', 'd', ',', 'D'
- , '<', (UChar)0x00F0, ',', (UChar)0x00D0 // eth
- , '<', 'e', ',', 'E'
- , '<', 'f', ',', 'F'
- , '<', 'g', ',', 'G'
- , '<', 'h', ',', 'H'
- , '<', 'i', ',', 'I'
- , '<', 'j', ',', 'J'
- , '<', 'k', ',', 'K'
- , '<', 'l', ',', 'L'
- , '<', 'm', ',', 'M'
- , '<', 'n', ',', 'N'
- , '<', 'o', ',', 'O'
- , '<', 'p', ',', 'P'
- , '<', 'q', ',', 'Q'
- , '<', 'r', ',', 'R'
- , '<', 's', ',', 'S', '&', 'S', 'S', ',', (UChar)0x00DF // s-zet
- , '<', 't', ',', 'T'
- , '&', 'T', 'H', ',', 0x00FE, '&', 'T', 'H', ',', (UChar)0x00DE // thorn
- , '<', 'u', ',', 'U'
- , '<', 'v', ',', 'V'
- , '<', 'w', ',', 'W'
- , '<', 'x', ',', 'X'
- , '<', 'y', ',', 'Y'
- , '<', 'z', ',', 'Z'
- , '&', 'A', 'E', ',', (UChar)0x00C6 // ae & AE ligature
- , '&', 'A', 'E', ',', (UChar)0x00E6
- , '&', 'O', 'E', ',', (UChar)0x0152 // oe & OE ligature
- , '&', 'O', 'E', ',', (UChar)0x0153
- , (UChar)0x0000
- };
-
-
-
- CollationDummyTest::CollationDummyTest()
- : myCollation(0)
- {
- UErrorCode status = U_ZERO_ERROR;
- UnicodeString rules(DEFAULTRULEARRAY);
- UnicodeString newRules("& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ");
-
- rules += newRules;
- myCollation = new RuleBasedCollator(rules, status);
- }
-
- CollationDummyTest::~CollationDummyTest()
- {
- delete myCollation;
- }
-
- const UChar CollationDummyTest::testSourceCases[][CollationDummyTest::MAX_TOKEN_LEN] = {
- {'a', 'b', '\'', 'c', 0},
- {'c', 'o', '-', 'o', 'p', 0},
- {'a', 'b', 0},
- {'a', 'm', 'p', 'e', 'r', 's', 'a', 'd', 0},
- {'a', 'l', 'l', 0},
- {'f', 'o', 'u', 'r', 0},
- {'f', 'i', 'v', 'e', 0},
- {'1', 0},
- {'1', 0},
- {'1', 0}, // 10
- {'2', 0},
- {'2', 0},
- {'H', 'e', 'l', 'l', 'o', 0},
- {'a', '<', 'b', 0},
- {'a', '<', 'b', 0},
- {'a', 'c', 'c', 0},
- {'a', 'c', 'H', 'c', 0}, // simple test
- {'p', 0x00EA, 'c', 'h', 'e', 0},
- {'a', 'b', 'c', 0},
- {'a', 'b', 'c', 0}, // 20
- {'a', 'b', 'c', 0},
- {'a', 'b', 'c', 0},
- {'a', 'b', 'c', 0},
- {'a', 0x00E6, 'c', 0},
- {'a', 'c', 'H', 'c', 0}, // primary test
- {'b', 'l', 'a', 'c', 'k', 0},
- {'f', 'o', 'u', 'r', 0},
- {'f', 'i', 'v', 'e', 0},
- {'1', 0},
- {'a', 'b', 'c', 0},
- {'a', 'b', 'c', 0}, // 30
- {'a', 'b', 'c', 'H', 0},
- {'a', 'b', 'c', 0},
- {'a', 'c', 'H', 'c', 0} // 33
- };
-
- const UChar CollationDummyTest::testTargetCases[][CollationDummyTest::MAX_TOKEN_LEN] = {
- {'a', 'b', 'c', '\'', 0},
- {'C', 'O', 'O', 'P', 0},
- {'a', 'b', 'c', 0},
- {'&', 0},
- {'&', 0},
- {'4', 0},
- {'5', 0},
- {'o', 'n', 'e', 0},
- {'n', 'n', 'e', 0},
- {'p', 'n', 'e', 0}, // 10
- {'t', 'w', 'o', 0},
- {'u', 'w', 'o', 0},
- {'h', 'e', 'l', 'l', 'O', 0},
- {'a', '<', '=', 'b', 0},
- {'a', 'b', 'c', 0},
- {'a', 'C', 'H', 'c', 0},
- {'a', 'C', 'H', 'c', 0}, // simple test
- {'p', 0x00E9, 'c', 'h', 0x00E9, 0},
- {'a', 'b', 'c', 0},
- {'a', 'B', 'C', 0}, // 20
- {'a', 'b', 'c', 'h', 0},
- {'a', 'b', 'd', 0},
- {0x00E4, 'b', 'c', 0},
- {'a', 0x00C6, 'c', 0},
- {'a', 'C', 'H', 'c', 0}, // primary test
- {'b', 'l', 'a', 'c', 'k', '-', 'b', 'i', 'r', 'd', 0},
- {'4', 0},
- {'5', 0},
- {'o', 'n', 'e', 0},
- {'a', 'b', 'c', 0},
- {'a', 'B', 'c', 0}, // 30
- {'a', 'b', 'c', 'h', 0},
- {'a', 'b', 'd', 0},
- {'a', 'C', 'H', 'c', 0} // 34
- };
-
- const Collator::EComparisonResult CollationDummyTest::results[] = {
- Collator::LESS,
- Collator::GREATER,
- Collator::LESS,
- Collator::LESS,
- Collator::LESS,
- Collator::LESS,
- Collator::LESS,
- Collator::GREATER,
- Collator::GREATER,
- Collator::LESS, // 10
- Collator::GREATER,
- Collator::LESS,
- Collator::GREATER,
- Collator::GREATER,
- Collator::LESS,
- Collator::LESS,
- Collator::LESS,
- // test primary > 17
- Collator::EQUAL,
- Collator::EQUAL,
- Collator::EQUAL, // 20
- Collator::LESS,
- Collator::LESS,
- Collator::EQUAL,
- Collator::EQUAL,
- Collator::EQUAL,
- Collator::LESS,
- // test secondary > 26
- Collator::EQUAL,
- Collator::EQUAL,
- Collator::EQUAL,
- Collator::EQUAL,
- Collator::EQUAL, // 30
- Collator::EQUAL,
- Collator::LESS,
- Collator::EQUAL // 34
- };
-
- const UChar CollationDummyTest::testCases[][CollationDummyTest::MAX_TOKEN_LEN] =
- {
- {'a', 0},
- {'A', 0},
- {0x00e4, 0},
- {0x00c4, 0},
- {'a', 'e', 0},
- {'a', 'E', 0},
- {'A', 'e', 0},
- {'A', 'E', 0},
- {0x00e6, 0},
- {0x00c6, 0},
- {'b', 0},
- {'c', 0},
- {'z', 0}
- };
-
- void CollationDummyTest::doTest( UnicodeString source, UnicodeString target, Collator::EComparisonResult result)
- {
- Collator::EComparisonResult compareResult = myCollation->compare(source, target);
- CollationKey sortKey1, sortKey2;
- UErrorCode key1status = U_ZERO_ERROR, key2status = U_ZERO_ERROR; //nos
- myCollation->getCollationKey(source, /*nos*/ sortKey1, key1status );
- myCollation->getCollationKey(target, /*nos*/ sortKey2, key2status );
- if (U_FAILURE(key1status) || U_FAILURE(key2status))
- {
- errln("SortKey generation Failed.\n");
- return;
- }
-
- Collator::EComparisonResult keyResult = sortKey1.compareTo(sortKey2);
- reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, result );
- }
-
- void CollationDummyTest::TestTertiary( char* par )
- {
- int32_t i = 0;
- myCollation->setStrength(Collator::TERTIARY);
- for (i = 0; i < 17 ; i++)
- {
- doTest(testSourceCases[i], testTargetCases[i], results[i]);
- }
- }
- void CollationDummyTest::TestPrimary( char* par )
- {
- int32_t i;
- myCollation->setStrength(Collator::PRIMARY);
- for (i = 17; i < 26; i++)
- {
- doTest(testSourceCases[i], testTargetCases[i], results[i]);
- }
- }
-
- void CollationDummyTest::TestSecondary( char* par )
- {
- int32_t i;
- myCollation->setStrength(Collator::SECONDARY);
- for (i = 26; i < 34; i++)
- {
- doTest(testSourceCases[i], testTargetCases[i], results[i]);
- }
- }
-
- void CollationDummyTest::TestExtra( char* par )
- {
- int32_t i, j;
- myCollation->setStrength(Collator::TERTIARY);
- for (i = 0; i < 12; i++)
- {
- for (j = i + 1; j < 13; j += 1)
- {
- doTest(testCases[i], testCases[j], Collator::LESS);
- }
- }
- }
-
- void CollationDummyTest::runIndexedTest( int32_t index, bool_t exec, char* &name, char* par )
- {
- if (exec) logln("TestSuite CollationDummyTest: ");
- switch (index) {
- case 0: name = "TestPrimary"; if (exec) TestPrimary( par ); break;
- case 1: name = "TestSecondary"; if (exec) TestSecondary( par ); break;
- case 2: name = "TestTertiary"; if (exec) TestTertiary( par ); break;
- case 3: name = "TestExtra"; if (exec) TestExtra( par ); break;
- default: name = ""; break;
- }
- }
-
-