home *** CD-ROM | disk | FTP | other *** search
Wrap
/* ***************************************************************************************** * * * COPYRIGHT: * * (C) Copyright Taligent, Inc., 1997 * * (C) Copyright International Business Machines Corporation, 1997-1998 * * Licensed Material - Program-Property of IBM - All Rights Reserved. * * US Government Users Restricted Rights - Use, duplication, or disclosure * * restricted by GSA ADP Schedule Contract with IBM Corp. * * * ***************************************************************************************** */ #include "intltest.h" #include "brkiter.h" #include "unicode.h" #include <stdio.h> //#include "txbdapi.h" // BreakIteratorAPIC //-------------------------------------------------------------------------------------- /** * "Vector" class for holding test tables * (this class is actually a linked list, but we use the name and API of the * java.util.Vector class to keep as much of our test code as possible the same.) */ class Enumeration { // text enumeration public: virtual bool_t hasMoreElements() = 0; virtual UnicodeString nextElement() = 0; }; class Vector { // text vector public: class TextLink { public: TextLink() : fLink(0), fText() {} TextLink(TextLink* link, UnicodeString text) : fLink(link), fText(text) {} TextLink* fLink; UnicodeString fText; }; public: TextLink fBase; TextLink* fEnd; int32_t fSize; public: class VectorEnumeration : public Enumeration { public: VectorEnumeration(Vector* vector) : fVector(vector), fPos(&vector->fBase) {} bool_t hasMoreElements() { return fPos->fLink != &fVector->fBase; } UnicodeString nextElement() { fPos = fPos->fLink; return fPos->fText; } Vector* fVector; TextLink* fPos; }; Vector() : fBase(), fEnd(&fBase), fSize(0) { fBase.fLink = &fBase; } ~Vector() { while (fBase.fLink != &fBase) { TextLink* link = fBase.fLink; fBase.fLink = link->fLink; delete link; } } void addElement(UnicodeString text) { fEnd->fLink = new TextLink(&fBase, text); fEnd = fEnd->fLink; ++fSize; } UnicodeString elementAt(int32_t pos) { if (pos >= fSize) return UnicodeString(); TextLink* link = fBase.fLink; while (pos-- > 0) link = link->fLink; return link->fText; } UnicodeString lastElement() { return fEnd == &fBase ? UnicodeString() : fEnd->fText; } int32_t size() { return fSize; } Enumeration* elements() { return new VectorEnumeration(this); } }; //-------------------------------------------------------------------------------------- /** * IntlTestTextBoundary is medium top level test class for everything in the directory "findword". */ #include "utypes.h" #include "ittxtbd.h" #include <string.h> #include "schriter.h" // [HSYS] Just to make it easier to use with UChar array. UnicodeString CharsToUnicodeString(const char* chars) { int len = strlen(chars); int i; UnicodeString buffer; for (i = 0; i < len;) { if ((chars[i] == '\\') && (i+1 < len) && (chars[i+1] == 'u')) { int unicode; sscanf(&(chars[i+2]), "%4X", &unicode); buffer += (UChar)unicode; i += 6; } else { buffer += (UChar)chars[i++]; } } return buffer; } const UChar IntlTestTextBoundary::cannedTestArray[] = { 0x0001, 0x0002, 0x0003, 0x0004, ' ', '!', '\\', '"', '#', '$', '%', '&', '(', ')', '+', '-', '0', '1', '2', '3', '4', '<', '=', '>', 'A', 'B', 'C', 'D', 'E', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', '{', '}', '|', ',', 0x00a0, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00ab, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b2, 0x00b3, 0x00b4, 0x00b9, 0x00bb, 0x00bc, 0x00bd, 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x0903, 0x093e, 0x093f, 0x0940, 0x0949, 0x0f3a, 0x0f3b, 0x2000, 0x2001, 0x2002, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2028, 0x2029, 0x202a, 0x203e, 0x203f, 0x2040, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x0000 }; UnicodeString* IntlTestTextBoundary::cannedTestChars = 0; //--------------------------------------------- // setup methods //--------------------------------------------- IntlTestTextBoundary::IntlTestTextBoundary() { UnicodeString temp(cannedTestArray); cannedTestChars = new UnicodeString(); *cannedTestChars += 0x0000; *cannedTestChars += temp; addTestWordData(); addTestSentenceData(); addTestLineData(); addTestCharacterData(); } IntlTestTextBoundary::~IntlTestTextBoundary() { delete wordSelectionData; delete sentenceSelectionData; delete lineSelectionData; delete characterSelectionData; delete cannedTestChars; } /** * @bug 4097779 4098467 4117554 */ void IntlTestTextBoundary::addTestWordData() { wordSelectionData = new Vector(); wordSelectionData->addElement("12,34"); wordSelectionData->addElement(" "); wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A2))); //cent sign wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A3))); //pound sign wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A4))); //currency sign wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A5))); //yen sign wordSelectionData->addElement("alpha-beta-gamma"); wordSelectionData->addElement("."); wordSelectionData->addElement(" "); wordSelectionData->addElement("Badges"); wordSelectionData->addElement("?"); wordSelectionData->addElement(" "); wordSelectionData->addElement("BADGES"); wordSelectionData->addElement("!"); wordSelectionData->addElement("?"); wordSelectionData->addElement("!"); wordSelectionData->addElement(" "); wordSelectionData->addElement("We"); wordSelectionData->addElement(" "); wordSelectionData->addElement("don't"); wordSelectionData->addElement(" "); wordSelectionData->addElement("need"); wordSelectionData->addElement(" "); wordSelectionData->addElement("no"); wordSelectionData->addElement(" "); wordSelectionData->addElement("STINKING"); wordSelectionData->addElement(" "); wordSelectionData->addElement("BADGES"); wordSelectionData->addElement("!"); wordSelectionData->addElement("!"); wordSelectionData->addElement("!"); wordSelectionData->addElement("012.566,5"); wordSelectionData->addElement(" "); wordSelectionData->addElement("123.3434,900"); wordSelectionData->addElement(" "); wordSelectionData->addElement("1000,233,456.000"); wordSelectionData->addElement(" "); wordSelectionData->addElement("1,23.322%"); wordSelectionData->addElement(" "); wordSelectionData->addElement("123.1222"); wordSelectionData->addElement(" "); wordSelectionData->addElement("$123,000.20"); wordSelectionData->addElement(" "); wordSelectionData->addElement("179.01%"); wordSelectionData->addElement("Hello"); wordSelectionData->addElement(","); wordSelectionData->addElement(" "); wordSelectionData->addElement("how"); wordSelectionData->addElement(" "); wordSelectionData->addElement("are"); wordSelectionData->addElement(" "); wordSelectionData->addElement("you"); wordSelectionData->addElement(" "); wordSelectionData->addElement("X"); wordSelectionData->addElement(" "); wordSelectionData->addElement("Now"); wordSelectionData->addElement("\r"); wordSelectionData->addElement("is"); wordSelectionData->addElement("\n"); wordSelectionData->addElement("the"); wordSelectionData->addElement("\r\n"); wordSelectionData->addElement("time"); wordSelectionData->addElement("\n"); wordSelectionData->addElement("\r"); wordSelectionData->addElement("for"); wordSelectionData->addElement("\r"); wordSelectionData->addElement("\r"); wordSelectionData->addElement("all"); wordSelectionData->addElement(" "); // to test for bug #4097779 wordSelectionData->addElement(CharsToUnicodeString("aa\\u0300a")); wordSelectionData->addElement(" "); // to test for bug #4098467 // What follows is a string of Korean characters (I found it in the Yellow Pages // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed // it correctly), first as precomposed syllables, and then as conjoining jamo. // Both sequences should be semantically identical and break the same way. // precomposed syllables... wordSelectionData->addElement(CharsToUnicodeString("\\uc0c1\\ud56d")); wordSelectionData->addElement(" "); wordSelectionData->addElement(CharsToUnicodeString("\\ud55c\\uc778")); wordSelectionData->addElement(" "); wordSelectionData->addElement(CharsToUnicodeString("\\uc5f0\\ud569")); wordSelectionData->addElement(" "); wordSelectionData->addElement(CharsToUnicodeString("\\uc7a5\\ub85c\\uad50\\ud68c")); wordSelectionData->addElement(" "); // conjoining jamo... wordSelectionData->addElement(CharsToUnicodeString("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc")); wordSelectionData->addElement(" "); wordSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab")); wordSelectionData->addElement(" "); wordSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8")); wordSelectionData->addElement(" "); wordSelectionData->addElement(CharsToUnicodeString("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c")); wordSelectionData->addElement(" "); // this is a test for bug #4117554: the ideographic iteration mark (U+3005) should // count as a Kanji character for the purposes of word breaking wordSelectionData->addElement("abc"); wordSelectionData->addElement(CharsToUnicodeString("\\u4e01\\u4e02\\u3005\\u4e03\\u4e03")); wordSelectionData->addElement("abc"); Enumeration *elems = wordSelectionData->elements(); testWordText = createTestData(elems); delete elems; } const UChar kParagraphSeparator = 0x2029; const UChar kLineSeparator = 0x2028; /** * @bug 4111338 4117554 4113835 */ void IntlTestTextBoundary::addTestSentenceData() { sentenceSelectionData = new Vector(); sentenceSelectionData->addElement("This is a simple sample sentence. "); sentenceSelectionData->addElement("(This is it.) "); sentenceSelectionData->addElement("This is a simple sample sentence. "); sentenceSelectionData->addElement("\"This isn\'t it.\" "); sentenceSelectionData->addElement("Hi! "); sentenceSelectionData->addElement("This is a simple sample sentence. "); sentenceSelectionData->addElement("It does not have to make any sense as you can see. "); sentenceSelectionData->addElement("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. "); sentenceSelectionData->addElement("Che la dritta via aveo smarrita. "); sentenceSelectionData->addElement("He said, that I said, that you said!! "); sentenceSelectionData->addElement("Don't rock the boat." + UCharToUnicodeString(kParagraphSeparator)); sentenceSelectionData->addElement("Because I am the daddy, that is why. "); sentenceSelectionData->addElement("Not on my time (el timo.)! "); sentenceSelectionData->addElement("So what!!" + UCharToUnicodeString(kParagraphSeparator)); sentenceSelectionData->addElement("\"But now,\" he said, \"I know!\" "); sentenceSelectionData->addElement("Harris thumbed down several, including \"Away We Go\" (which became the huge success Oklahoma!). "); sentenceSelectionData->addElement("One species, B. anthracis, is highly virulent.\n"); sentenceSelectionData->addElement("Wolf said about Sounder:\"Beautifully thought-out and directed.\" "); sentenceSelectionData->addElement("Have you ever said, \"This is where\tI shall live\"? "); sentenceSelectionData->addElement("He answered, \"You may not!\" "); sentenceSelectionData->addElement("Another popular saying is: \"How do you do?\". "); sentenceSelectionData->addElement("Yet another popular saying is: \'I\'m fine thanks.\' "); sentenceSelectionData->addElement("What is the proper use of the abbreviation pp.? "); sentenceSelectionData->addElement("Yes, I am definatelly 12\" tall!!"); // test for bug #4113835: \n and \r count as spaces, not as paragraph breaks sentenceSelectionData->addElement(CharsToUnicodeString("Now\ris\nthe\r\ntime\n\rfor\r\rall\\u2029")); // test for bug #4111338: Don't break sentences at the boundary between CJK // and other letters sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165:\"JAVA\\u821c") + CharsToUnicodeString("\\u8165\\u7fc8\\u51ce\\u306d,\\u2494\\u56d8\\u4ec0\\u60b1\\u8560\\u51ba") + CharsToUnicodeString("\\u611d\\u57b6\\u2510\\u5d46\".\\u2029")); sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165\\u9de8") + CharsToUnicodeString("\\u97e4JAVA\\u821c\\u8165\\u7fc8\\u51ce\\u306d\\ue30b\\u2494\\u56d8\\u4ec0") + CharsToUnicodeString("\\u60b1\\u8560\\u51ba\\u611d\\u57b6\\u2510\\u5d46\\u97e5\\u7751\\u2029")); sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165\\u9de8\\u97e4") + CharsToUnicodeString("\\u6470\\u8790JAVA\\u821c\\u8165\\u7fc8\\u51ce\\u306d\\ue30b\\u2494\\u56d8") + CharsToUnicodeString("\\u4ec0\\u60b1\\u8560\\u51ba\\u611d\\u57b6\\u2510\\u5d46\\u97e5\\u7751\\u2029")); sentenceSelectionData->addElement(CharsToUnicodeString("He said, \"I can go there.\"\\u2029")); // test for bug #4117554: Treat fullwidth variants of .!? the same as their // normal counterparts sentenceSelectionData->addElement(CharsToUnicodeString("I know I'm right\\uff0e ")); sentenceSelectionData->addElement(CharsToUnicodeString("Right\\uff1f ")); sentenceSelectionData->addElement(CharsToUnicodeString("Right\\uff01 ")); // test for bug #4117554: Don't break sentences at boundary between CJK and digits sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165\\u9de8") + CharsToUnicodeString("\\u97e48888\\u821c\\u8165\\u7fc8\\u51ce\\u306d\\ue30b\\u2494\\u56d8\\u4ec0") + CharsToUnicodeString("\\u60b1\\u8560\\u51ba\\u611d\\u57b6\\u2510\\u5d46\\u97e5\\u7751\\u2029")); // test for bug #4117554: Break sentence between a sentence terminator and // opening punctuation sentenceSelectionData->addElement("no?"); sentenceSelectionData->addElement("(yes)" + CharsToUnicodeString("\\u2029")); // test for bug #4158381: Don't break sentence after period if it isn't // followed by a space sentenceSelectionData->addElement("Test <code>Flags.Flag</code> class. "); sentenceSelectionData->addElement("Another test." + CharsToUnicodeString("\\u2029")); // test for bug #4158381: No breaks when there are no terminators around sentenceSelectionData->addElement("<P>Provides a set of "lightweight" (all-java<FONT SIZE=\"-2\"><SUP>TM</SUP></FONT> language) components that, to the maximum degree possible, work the same on all platforms. "); sentenceSelectionData->addElement("Another test." + CharsToUnicodeString("\\u2029")); // test for bug #4143071: Make sure sentences that end with digits // work right sentenceSelectionData->addElement("Today is the 27th of May, 1998. "); sentenceSelectionData->addElement("Tomorrow with be 28 May 1998. "); sentenceSelectionData->addElement("The day after will be the 30th." + CharsToUnicodeString("\\u2029")); // test for bug #4152416: Make sure sentences ending with a capital // letter are treated correctly sentenceSelectionData->addElement("The type of all primitive <code>boolean</code> values accessed in the target VM. "); sentenceSelectionData->addElement("Calls to xxx will return an implementor of this interface." + CharsToUnicodeString("\\u2029")); // test for bug #4152117: Make sure sentence breaking is handling // punctuation correctly [COULD NOT REPRODUCE THIS BUG, BUT TEST IS // HERE TO MAKE SURE IT DOESN'T CROP UP] sentenceSelectionData->addElement("Constructs a randomly generated BigInteger, uniformly distributed over the range <tt>0</tt> to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. "); sentenceSelectionData->addElement("The uniformity of the distribution assumes that a fair source of random bits is provided in <tt>rnd</tt>. "); sentenceSelectionData->addElement("Note that this constructor always constructs a non-negative BigInteger." + CharsToUnicodeString("\\u2029")); Enumeration *elems = sentenceSelectionData->elements(); testSentenceText = createTestData(elems); delete elems; } /** * @bug 4068133 4086052 4035266 4097920 4098467 4117554 */ void IntlTestTextBoundary::addTestLineData() { lineSelectionData = new Vector(); lineSelectionData->addElement("Multi-"); lineSelectionData->addElement("Level "); lineSelectionData->addElement("example "); lineSelectionData->addElement("of "); lineSelectionData->addElement("a "); lineSelectionData->addElement("semi-"); lineSelectionData->addElement("idiotic "); lineSelectionData->addElement("non-"); lineSelectionData->addElement("sensical "); lineSelectionData->addElement("(non-"); lineSelectionData->addElement("important) "); lineSelectionData->addElement("sentence. "); lineSelectionData->addElement("Hi "); lineSelectionData->addElement("Hello "); lineSelectionData->addElement("How\n"); lineSelectionData->addElement("are\r"); lineSelectionData->addElement("you" + UCharToUnicodeString(kLineSeparator)); lineSelectionData->addElement("fine.\t"); lineSelectionData->addElement("good. "); lineSelectionData->addElement("Now\r"); lineSelectionData->addElement("is\n"); lineSelectionData->addElement("the\r\n"); lineSelectionData->addElement("time\n"); lineSelectionData->addElement("\r"); lineSelectionData->addElement("for\r"); lineSelectionData->addElement("\r"); lineSelectionData->addElement("all"); // to test for bug #4068133 lineSelectionData->addElement(CharsToUnicodeString("\\u96f6")); lineSelectionData->addElement(CharsToUnicodeString("\\u4e00\\u3002")); lineSelectionData->addElement(CharsToUnicodeString("\\u4e8c\\u3001")); lineSelectionData->addElement(CharsToUnicodeString("\\u4e09\\u3002\\u3001")); lineSelectionData->addElement(CharsToUnicodeString("\\u56db\\u3001\\u3002\\u3001")); lineSelectionData->addElement(CharsToUnicodeString("\\u4e94,")); lineSelectionData->addElement(CharsToUnicodeString("\\u516d.")); lineSelectionData->addElement(CharsToUnicodeString("\\u4e03.\\u3001,\\u3002")); lineSelectionData->addElement(CharsToUnicodeString("\\u516b")); // to test for bug #4086052 lineSelectionData->addElement(CharsToUnicodeString("foo\\u00a0bar ")); // lineSelectionData->addElement("foo\\ufeffbar"); // to test for bug #4097920 lineSelectionData->addElement("dog,"); lineSelectionData->addElement("cat,"); lineSelectionData->addElement("mouse "); lineSelectionData->addElement("(one)"); lineSelectionData->addElement("(two)\n"); // to test for bug #4035266 lineSelectionData->addElement("The "); lineSelectionData->addElement("balance "); lineSelectionData->addElement("is "); lineSelectionData->addElement("$-23,456.78, "); lineSelectionData->addElement("not "); lineSelectionData->addElement("-$32,456.78!\n"); // to test for bug #4098467 // What follows is a string of Korean characters (I found it in the Yellow Pages // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed // it correctly), first as precomposed syllables, and then as conjoining jamo. // Both sequences should be semantically identical and break the same way. // precomposed syllables... lineSelectionData->addElement(CharsToUnicodeString("\\uc0c1\\ud56d ")); lineSelectionData->addElement(CharsToUnicodeString("\\ud55c\\uc778 ")); lineSelectionData->addElement(CharsToUnicodeString("\\uc5f0\\ud569 ")); lineSelectionData->addElement(CharsToUnicodeString("\\uc7a5\\ub85c\\uad50\\ud68c ")); // conjoining jamo... lineSelectionData->addElement(CharsToUnicodeString("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc ")); lineSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab ")); lineSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8 ")); lineSelectionData->addElement(CharsToUnicodeString("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c")); // to test for bug #4117554: Fullwidth .!? should be treated as postJwrd lineSelectionData->addElement(CharsToUnicodeString("\\u4e01\\uff0e")); lineSelectionData->addElement(CharsToUnicodeString("\\u4e02\\uff01")); lineSelectionData->addElement(CharsToUnicodeString("\\u4e03\\uff1f")); Enumeration *elems = lineSelectionData->elements(); testLineText = createTestData(elems); delete elems; } /* const UnicodeString graveS = "S" + (UChar)0x0300; const UnicodeString acuteBelowI = "i" + UCharToUnicodeString(0x0317); const UnicodeString acuteE = "e" + UCharToUnicodeString(0x0301); const UnicodeString circumflexA = "a" + UCharToUnicodeString(0x0302); const UnicodeString tildeE = "e" + UCharToUnicodeString(0x0303); */ /** * @bug 4098467 */ void IntlTestTextBoundary::addTestCharacterData() { characterSelectionData = new Vector(); characterSelectionData->addElement("S" + UCharToUnicodeString(0x0300)); //graveS characterSelectionData->addElement("i" + UCharToUnicodeString(0x0301)); // acuteBelowI characterSelectionData->addElement("m"); characterSelectionData->addElement("p"); characterSelectionData->addElement("l"); characterSelectionData->addElement("e" + UCharToUnicodeString(0x0301)); // acuteE characterSelectionData->addElement(" "); characterSelectionData->addElement("s"); characterSelectionData->addElement("a" + UCharToUnicodeString(0x0302)); // circumflexA characterSelectionData->addElement("m"); characterSelectionData->addElement("p"); characterSelectionData->addElement("l"); characterSelectionData->addElement("e" + UCharToUnicodeString(0x0303)); // tildeE characterSelectionData->addElement("."); characterSelectionData->addElement("w"); characterSelectionData->addElement("a" + UCharToUnicodeString(0x0302)); // circumflexA characterSelectionData->addElement("w"); characterSelectionData->addElement("a"); characterSelectionData->addElement("f"); characterSelectionData->addElement("q"); characterSelectionData->addElement("\n"); characterSelectionData->addElement("\r"); characterSelectionData->addElement("\r\n"); characterSelectionData->addElement("\n"); // to test for bug #4098467 // What follows is a string of Korean characters (I found it in the Yellow Pages // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed // it correctly), first as precomposed syllables, and then as conjoining jamo. // Both sequences should be semantically identical and break the same way. // precomposed syllables... characterSelectionData->addElement(CharsToUnicodeString("\\uc0c1")); characterSelectionData->addElement(CharsToUnicodeString("\\ud56d")); characterSelectionData->addElement(" "); characterSelectionData->addElement(CharsToUnicodeString("\\ud55c")); characterSelectionData->addElement(CharsToUnicodeString("\\uc778")); characterSelectionData->addElement(" "); characterSelectionData->addElement(CharsToUnicodeString("\\uc5f0")); characterSelectionData->addElement(CharsToUnicodeString("\\ud569")); characterSelectionData->addElement(" "); characterSelectionData->addElement(CharsToUnicodeString("\\uc7a5")); characterSelectionData->addElement(CharsToUnicodeString("\\ub85c")); characterSelectionData->addElement(CharsToUnicodeString("\\uad50")); characterSelectionData->addElement(CharsToUnicodeString("\\ud68c")); characterSelectionData->addElement(" "); // conjoining jamo... characterSelectionData->addElement(CharsToUnicodeString("\\u1109\\u1161\\u11bc")); characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11bc")); characterSelectionData->addElement(" "); characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11ab")); characterSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1175\\u11ab")); characterSelectionData->addElement(" "); characterSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1167\\u11ab")); characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11b8")); characterSelectionData->addElement(" "); characterSelectionData->addElement(CharsToUnicodeString("\\u110c\\u1161\\u11bc")); characterSelectionData->addElement(CharsToUnicodeString("\\u1105\\u1169")); characterSelectionData->addElement(CharsToUnicodeString("\\u1100\\u116d")); characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u116c")); Enumeration *elems = characterSelectionData->elements(); testCharacterText = createTestData(elems); delete elems; } UnicodeString IntlTestTextBoundary::createTestData(Enumeration* e) { UnicodeString result = ""; while (e->hasMoreElements()) { result += e->nextElement(); } return result; } //--------------------------------------------- // SentenceBreak tests //--------------------------------------------- void IntlTestTextBoundary::TestForwardSentenceSelection() { BreakIterator* e = BreakIterator::createSentenceInstance(); doForwardSelectionTest(*e, testSentenceText, sentenceSelectionData); delete e; } void IntlTestTextBoundary::TestFirstSentenceSelection() { BreakIterator* e = BreakIterator::createSentenceInstance(); doFirstSelectionTest(*e, testSentenceText, sentenceSelectionData); delete e; } void IntlTestTextBoundary::TestLastSentenceSelection() { BreakIterator* e = BreakIterator::createSentenceInstance(); doLastSelectionTest(*e, testSentenceText, sentenceSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardSentenceSelection() { BreakIterator* e = BreakIterator::createSentenceInstance(); doBackwardSelectionTest(*e, testSentenceText, sentenceSelectionData); delete e; } void IntlTestTextBoundary::TestForwardSentenceIndexSelection() { BreakIterator* e = BreakIterator::createSentenceInstance(); doForwardIndexSelectionTest(*e, testSentenceText, sentenceSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardSentenceIndexSelection() { BreakIterator* e = BreakIterator::createSentenceInstance(); doBackwardIndexSelectionTest(*e, testSentenceText, sentenceSelectionData); delete e; } void IntlTestTextBoundary::TestSentenceMultipleSelection() { BreakIterator* e = BreakIterator::createSentenceInstance(); doMultipleSelectionTest(*e, testSentenceText); delete e; } void IntlTestTextBoundary::TestSentenceInvariants() { BreakIterator *e = BreakIterator::createSentenceInstance(); UnicodeString s = *cannedTestChars + CharsToUnicodeString(".,\\u3001\\u3002\\u3041\\u3042\\u3043\\ufeff"); doOtherInvariantTest(*e, s); delete e; } //--------------------------------------------- // WordBreak tests //--------------------------------------------- void IntlTestTextBoundary::TestForwardWordSelection() { BreakIterator* e = BreakIterator::createWordInstance(); doForwardSelectionTest(*e, testWordText, wordSelectionData); delete e; } void IntlTestTextBoundary::TestFirstWordSelection() { BreakIterator* e = BreakIterator::createWordInstance(); doFirstSelectionTest(*e, testWordText, wordSelectionData); delete e; } void IntlTestTextBoundary::TestLastWordSelection() { BreakIterator* e = BreakIterator::createWordInstance(); doLastSelectionTest(*e, testWordText, wordSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardWordSelection() { BreakIterator* e = BreakIterator::createWordInstance(); doBackwardSelectionTest(*e, testWordText, wordSelectionData); delete e; } void IntlTestTextBoundary::TestForwardWordIndexSelection() { BreakIterator* e = BreakIterator::createWordInstance(); doForwardIndexSelectionTest(*e, testWordText, wordSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardWordIndexSelection() { BreakIterator* e = BreakIterator::createWordInstance(); doBackwardIndexSelectionTest(*e, testWordText, wordSelectionData); delete e; } void IntlTestTextBoundary::TestWordMultipleSelection() { BreakIterator* e = BreakIterator::createWordInstance(); doMultipleSelectionTest(*e, testWordText); delete e; } void IntlTestTextBoundary::TestWordInvariants() { BreakIterator *e = BreakIterator::createWordInstance(); UnicodeString s = *cannedTestChars + CharsToUnicodeString("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02"); doBreakInvariantTest(*e, s); s = *cannedTestChars + CharsToUnicodeString("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02"); doOtherInvariantTest(*e, s); delete e; } //--------------------------------------------- // LineBreak tests //--------------------------------------------- void IntlTestTextBoundary::TestForwardLineSelection() { BreakIterator* e = BreakIterator::createLineInstance(); doForwardSelectionTest(*e, testLineText, lineSelectionData); delete e; } void IntlTestTextBoundary::TestFirstLineSelection() { BreakIterator* e = BreakIterator::createLineInstance(); doFirstSelectionTest(*e, testLineText, lineSelectionData); delete e; } void IntlTestTextBoundary::TestLastLineSelection() { BreakIterator* e = BreakIterator::createLineInstance(); doLastSelectionTest(*e, testLineText, lineSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardLineSelection() { BreakIterator* e = BreakIterator::createLineInstance(); doBackwardSelectionTest(*e, testLineText, lineSelectionData); delete e; } void IntlTestTextBoundary::TestForwardLineIndexSelection() { BreakIterator* e = BreakIterator::createLineInstance(); doForwardIndexSelectionTest(*e, testLineText, lineSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardLineIndexSelection() { BreakIterator* e = BreakIterator::createLineInstance(); doBackwardIndexSelectionTest(*e, testLineText, lineSelectionData); delete e; } void IntlTestTextBoundary::TestLineMultipleSelection() { BreakIterator* e = BreakIterator::createLineInstance(); doMultipleSelectionTest(*e, testLineText); delete e; } void IntlTestTextBoundary::TestLineInvariants() { BreakIterator *e = BreakIterator::createLineInstance(); UnicodeString s = CharsToUnicodeString(".,;:\\u3001\\u3002\\u3041\\u3042\\u3043\\u3044\\u3045\\u30a3\\u4e00\\u4e01\\u4e02"); UnicodeString testChars = *cannedTestChars + s; doBreakInvariantTest(*e, testChars); doOtherInvariantTest(*e, testChars); int errorCount = 0; UTextOffset i, j, k; // in addition to the other invariants, a line-break iterator should make sure that: // it doesn't break around the non-breaking characters UnicodeString noBreak = CharsToUnicodeString("\\u00a0\\u2007\\u2011\\ufeff"); UnicodeString work("aaa"); for (i = 0; i < testChars.size(); i++) { UChar c = testChars[i]; if (c == '\r' || c == '\n' || c == 0x2029 || c == 0x2028 || c == 0x0003) continue; work[0] = c; for (j = 0; j < noBreak.size(); j++) { work[1] = noBreak[j]; for (k = 0; k < testChars.size(); k++) { work[2] = testChars[k]; e->setText(&work); for (int l = e->first(); l != BreakIterator::DONE; l = e->next()) if (l == 1 || l == 2) { errln("Got break between U+" + UCharToUnicodeString(work[l - 1]) + " and U+" + UCharToUnicodeString(work[l])); errorCount++; if (errorCount >= 75) return; } } } } // it does break after hyphens (unless they're followed by a digit, a non-spacing mark, // a currency symbol, a non-breaking space, or a line or paragraph separator) UnicodeString dashes = CharsToUnicodeString("-\\u00ad\\u2010\\u2012\\u2013\\u2014"); for (i = 0; i < testChars.size(); i++) { work[0] = testChars[i]; for (j = 0; j < dashes.size(); j++) { work[1] = dashes[j]; for (k = 0; k < testChars.size(); k++) { UChar c = testChars[k]; if (Unicode::getType(c) == Unicode::DECIMAL_DIGIT_NUMBER || Unicode::getType(c) == Unicode::OTHER_NUMBER || Unicode::getType(c) == Unicode::NON_SPACING_MARK || Unicode::getType(c) == Unicode::ENCLOSING_MARK || Unicode::getType(c) == Unicode::CURRENCY_SYMBOL || c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029 || c == 0x0003 || c == 0x00a0 || c == 0x2007 || c == 0x2011 || c == 0xfeff) continue; work[2] = c; e->setText(&work); bool_t saw2 = FALSE; for (int l = e->first(); l != BreakIterator::DONE; l = e->next()) if (l == 2) saw2 = TRUE; if (!saw2) { errln("Didn't get break between U+" + UCharToUnicodeString(work[1]) + " and U+" + UCharToUnicodeString(work[2])); errorCount++; if (errorCount >= 75) return; } } } } } //--------------------------------------------- // CharacterBreak tests //--------------------------------------------- void IntlTestTextBoundary::TestForwardCharacterSelection() { BreakIterator* e = BreakIterator::createCharacterInstance(); doForwardSelectionTest(*e, testCharacterText, characterSelectionData); delete e; } void IntlTestTextBoundary::TestFirstCharacterSelection() { BreakIterator* e = BreakIterator::createCharacterInstance(); doFirstSelectionTest(*e, testCharacterText, characterSelectionData); delete e; } void IntlTestTextBoundary::TestLastCharacterSelection() { BreakIterator* e = BreakIterator::createCharacterInstance(); doLastSelectionTest(*e, testCharacterText, characterSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardCharacterSelection() { BreakIterator* e = BreakIterator::createCharacterInstance(); doBackwardSelectionTest(*e, testCharacterText, characterSelectionData); delete e; } void IntlTestTextBoundary::TestForwardCharacterIndexSelection() { BreakIterator* e = BreakIterator::createCharacterInstance(); doForwardIndexSelectionTest(*e, testCharacterText, characterSelectionData); delete e; } void IntlTestTextBoundary::TestBackwardCharacterIndexSelection() { BreakIterator* e = BreakIterator::createCharacterInstance(); doBackwardIndexSelectionTest(*e, testCharacterText, characterSelectionData); delete e; } void IntlTestTextBoundary::TestCharacterMultipleSelection() { BreakIterator* e = BreakIterator::createCharacterInstance(); doMultipleSelectionTest(*e, testCharacterText); delete e; } void IntlTestTextBoundary::TestCharacterInvariants() { BreakIterator *e = BreakIterator::createCharacterInstance(); UnicodeString s = *cannedTestChars + CharsToUnicodeString("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa"); doBreakInvariantTest(*e, s); s = *cannedTestChars + CharsToUnicodeString("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa"); doOtherInvariantTest(*e, s); delete e; } //--------------------------------------------- // other tests //--------------------------------------------- void IntlTestTextBoundary::TestEmptyString() { UnicodeString text = ""; Vector x; x.addElement(text); BreakIterator* bi = BreakIterator::createLineInstance(); doForwardSelectionTest(*bi, text, &x); doFirstSelectionTest(*bi, text, &x); doLastSelectionTest(*bi, text, &x); doBackwardSelectionTest(*bi, text, &x); doForwardIndexSelectionTest(*bi, text, &x); doBackwardIndexSelectionTest(*bi, text, &x); delete bi; } void IntlTestTextBoundary::TestGetAvailableLocales() { int32_t locCount = 0; const Locale* locList = BreakIterator::getAvailableLocales(locCount); if (locCount == 0) errln("getAvailableLocales() returned an empty list!"); // I have no idea how to test this function... } /** * @bug 4095322 */ void IntlTestTextBoundary::TestJapaneseLineBreak() { UnicodeString testString = CharsToUnicodeString("\\u4e00x\\u4e8c"); UnicodeString precedingChars = CharsToUnicodeString("([{\\u00ab$\\u00a5\\u00a3\\u00a4\\u2018\\u201a\\u201c\\u201e\\u201b\\u201f"); UnicodeString followingChars = CharsToUnicodeString(")]}\\u00bb!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7\\u30fc:;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u30fe\\u2019\\u201d\\u00b0\\u2032\\u2033\\u2034\\u2030\\u2031\\u2103\\u2109\\u00a2\\u0300\\u0301\\u0302"); BreakIterator *iter = BreakIterator::createLineInstance(Locale::JAPAN); StringCharacterIterator* it = new StringCharacterIterator(testString); UTextOffset i; for (i = 0; i < precedingChars.size(); i++) { testString[1] = precedingChars[i]; iter->adoptText(it); int32_t j = iter->first(); if (j != 0) errln("ja line break failure: failed to start at 0"); j = iter->next(); if (j != 1) errln("ja line break failure: failed to stop before '" + UCharToUnicodeString(precedingChars[i]) + "' (" + ((int)(precedingChars[i])) + ")"); j = iter->next(); if (j != 3) errln("ja line break failure: failed to skip position after '" + UCharToUnicodeString(precedingChars[i]) + "' (" + ((int)(precedingChars[i])) + ")"); } for (i = 0; i < followingChars.size(); i++) { testString[1] = followingChars[i]; it = new StringCharacterIterator(testString); iter->adoptText(it); int j = iter->first(); if (j != 0) errln("ja line break failure: failed to start at 0"); j = iter->next(); if (j != 2) errln("ja line break failure: failed to skip position before '" + UCharToUnicodeString(followingChars[i]) + "' (" + ((int)(followingChars[i])) + ")"); j = iter->next(); if (j != 3) errln("ja line break failure: failed to stop after '" + UCharToUnicodeString(followingChars[i]) + "' (" + ((int)(followingChars[i])) + ")"); } delete iter; } // [serialization test has been removed pursuant to bug #4152965] void IntlTestTextBoundary::TestGetDisplayName() { UnicodeString result; BreakIterator::getDisplayName(Locale::US, result); if (Locale::getDefault() == Locale::US && result != "English (United States)") errln("BreakIterator::getDisplayName() failed: expected \"English (United States)\", got \"" + result); BreakIterator::getDisplayName(Locale::FRANCE, Locale::US, result); if (result != "French (France)") errln("BreakIterator::getDisplayName() failed: expected \"French (France)\", got \"" + result); } /** * @bug 4068137 */ void IntlTestTextBoundary::TestEndBehavior() { UnicodeString testString("boo."); BreakIterator *wb = BreakIterator::createWordInstance(); wb->setText(&testString); if (wb->first() != 0) errln("Didn't get break at beginning of string."); if (wb->next() != 3) errln("Didn't get break before period in \"boo.\""); if (wb->current() != 4 && wb->next() != 4) errln("Didn't get break at end of string."); delete wb; } //--------------------------------------------- // runIndexedTest //--------------------------------------------- void IntlTestTextBoundary::runIndexedTest( int32_t index, bool_t exec, char* &name, char* par ) { if (exec) logln("TestSuite TextBoundary: "); switch (index) { case 0: name = "TestForwardSentenceSelection"; if (exec) TestForwardSentenceSelection(); break; case 1: name = "TestFirstSentenceSelection"; if (exec) TestFirstSentenceSelection(); break; case 2: name = "TestLastSentenceSelection"; if (exec) TestLastSentenceSelection(); break; case 3: name = "TestBackwardSentenceSelection"; if (exec) TestBackwardSentenceSelection(); break; case 4: name = "TestForwardSentenceIndexSelection"; if (exec) TestForwardSentenceIndexSelection(); break; case 5: name = "TestBackwardSentenceIndexSelection"; if (exec) TestBackwardSentenceIndexSelection(); break; case 6: name = "TestSentenceMultipleSelection"; if (exec) TestSentenceMultipleSelection(); break; case 7: name = "TestForwardWordSelection"; if (exec) TestForwardWordSelection(); break; case 8: name = "TestFirstWordSelection"; if (exec) TestFirstWordSelection(); break; case 9: name = "TestLastWordSelection"; if (exec) TestLastWordSelection(); break; case 10: name = "TestBackwardWordSelection"; if (exec) TestBackwardWordSelection(); break; case 11: name = "TestForwardWordIndexSelection"; if (exec) TestForwardWordIndexSelection(); break; case 12: name = "TestBackwardWordIndexSelection"; if (exec) TestBackwardWordIndexSelection(); break; case 13: name = "TestWordMultipleSelection"; if (exec) TestWordMultipleSelection(); break; case 14: name = "TestForwardLineSelection"; if (exec) TestForwardLineSelection(); break; case 15: name = "TestFirstLineSelection"; if (exec) TestFirstLineSelection(); break; case 16: name = "TestLastLineSelection"; if (exec) TestLastLineSelection(); break; case 17: name = "TestBackwardLineSelection"; if (exec) TestBackwardLineSelection(); break; case 18: name = "TestForwardLineIndexSelection"; if (exec) TestForwardLineIndexSelection(); break; case 19: name = "TestBackwardLineIndexSelection"; if (exec) TestBackwardLineIndexSelection(); break; case 20: name = "TestLineMultipleSelection"; if (exec) TestLineMultipleSelection(); break; case 21: name = "TestForwardCharacterSelection"; if (exec) TestForwardCharacterSelection(); break; case 22: name = "TestFirstCharacterSelection"; if (exec) TestFirstCharacterSelection(); break; case 23: name = "TestLastCharacterSelection"; if (exec) TestLastCharacterSelection(); break; case 24: name = "TestBackwardCharacterSelection"; if (exec) TestBackwardCharacterSelection(); break; case 25: name = "TestForwardCharacterIndexSelection"; if (exec) TestForwardCharacterIndexSelection(); break; case 26: name = "TestBackwardCharacterIndexSelection"; if (exec) TestBackwardCharacterIndexSelection(); break; case 27: name = "TestCharacterMultipleSelection"; if (exec) TestCharacterMultipleSelection(); break; case 28: name = "TestEmptyString"; if (exec) TestEmptyString(); break; case 29: name = "TestGetAvailableLocales"; if (exec) TestGetAvailableLocales(); break; case 30: name = "TestGetDisplayName"; if (exec) TestGetDisplayName(); break; case 31: name = "TestPreceding"; if (exec) TestPreceding(); break; case 32: name = "TestBug4153072"; if (exec) TestBug4153072(); break; /* case 33: name = "BreakIteratorCAPI"; if (exec) { logln("BreakIterator C API test---"); logln(""); IntlTestBreakIteratorFormatU_CAPI test; callTest( test, par ); } break; */ default: name = ""; break; //needed to end loop } } //--------------------------------------------- // Test implementation routines //--------------------------------------------- void IntlTestTextBoundary::doForwardSelectionTest(BreakIterator& iterator, UnicodeString& testText, Vector* result) { int32_t forwardSelectionCounter = 0; int32_t forwardSelectionOffset = 0; CharacterIterator *itSource = 0; CharacterIterator *itTarget = 0; logln("doForwardSelectionTest text of length: "+testText.size()); // check to make sure setText() and getText() work right iterator.setText(&testText); itSource = iterator.createText(); itTarget = new StringCharacterIterator(testText); if (*itSource != *itTarget) errln("createText() didn't return what we passed to setText!"); delete itSource; delete itTarget; UnicodeString expectedResult; UnicodeString selectionResult; int32_t lastOffset = iterator.first(); int32_t offset = iterator.next(); while(offset != BreakIterator::DONE && forwardSelectionCounter < result->size()) { if (offset != iterator.current()) errln((UnicodeString)"current() failed: it returned " + iterator.current() + " and offset was " + offset); expectedResult = result->elementAt(forwardSelectionCounter); forwardSelectionOffset += expectedResult.size(); testText.extractBetween(lastOffset, offset, selectionResult); if (offset != forwardSelectionOffset) { errln((UnicodeString)"\n*** Selection #" + forwardSelectionCounter + "\nExpected : " + expectedResult + " - length : " + expectedResult.size() + "\nSelected : " + selectionResult + " - length : " + selectionResult.size()); } logln((UnicodeString)"#" + forwardSelectionCounter + " ["+lastOffset+", "+offset+"] : " + selectionResult); forwardSelectionCounter++; lastOffset = offset; offset = iterator.next(); } if (forwardSelectionCounter < result->size() - 1) errln((UnicodeString)"\n*** Selection #" + forwardSelectionCounter + " not found at offset "+offset+"!!!"); else if (forwardSelectionCounter >= result->size() && offset != BreakIterator::DONE) errln((UnicodeString)"\n*** Selection #" + forwardSelectionCounter + " should not exist at offset "+offset+"!!!"); } void IntlTestTextBoundary::doBackwardSelectionTest(BreakIterator& iterator, UnicodeString& testText, Vector* result) { int32_t backwardSelectionCounter = (result->size() - 1); int32_t neededOffset = testText.size(); int32_t lastOffset = iterator.last(); iterator.setText(&testText); int32_t offset = iterator.previous(); UnicodeString expectedResult; UnicodeString selectionResult; while(offset != BreakIterator::DONE) { expectedResult = (UnicodeString)result->elementAt(backwardSelectionCounter); neededOffset -= expectedResult.size(); testText.extractBetween(offset, lastOffset, selectionResult); if(offset != neededOffset) { errln( (UnicodeString)"\n*** Selection #" + backwardSelectionCounter + "\nExpected "+neededOffset+"> " + expectedResult + " <" + "\nSelected "+offset+"> " + selectionResult + " <"); } logln((UnicodeString)"#" + backwardSelectionCounter + " : " + selectionResult); backwardSelectionCounter--; lastOffset = offset; offset = iterator.previous(); } if (backwardSelectionCounter >= 0 && offset != BreakIterator::DONE) errln((UnicodeString)"*** Selection #" + backwardSelectionCounter + " not found!!!"); } void IntlTestTextBoundary::doFirstSelectionTest(BreakIterator& iterator, UnicodeString& testText, Vector* result) { bool_t success = TRUE; UnicodeString expectedFirstSelection; UnicodeString tempFirst; iterator.setText(&testText); int32_t selectionStart = iterator.first(); int32_t selectionEnd = iterator.next(); if(selectionEnd != BreakIterator::DONE) { testText.extractBetween(selectionStart, selectionEnd, tempFirst); expectedFirstSelection = result->elementAt(0); if(tempFirst != expectedFirstSelection) { errln( (UnicodeString)"\n\n" + "### Error in TestFindWord::doFirstSelectionTest. First selection not equal to what expected." + "\nExpexcted : " + expectedFirstSelection + " - length : " + expectedFirstSelection.size() + "\nSelected : " + tempFirst + " - length : " + tempFirst.size() + "\n"); success = FALSE; } } else if (selectionStart != 0 || testText.size() != 0) { errln((UnicodeString)"\n### Error in TTestFindWord::doFirstSelectionTest. Could not get first selection.\n"+ "start = "+selectionStart+" end = "+selectionEnd); success = FALSE; } if(success) { logln( (UnicodeString)"IntlTestTextBoundary::doFirstSelectionTest \n" + "\nExpexcted first selection: " + expectedFirstSelection + "\nCalculated first selection: " + tempFirst + " is correct\n"); } } void IntlTestTextBoundary::doLastSelectionTest(BreakIterator& iterator, UnicodeString& testText, Vector* result) { bool_t success = TRUE; UnicodeString expectedLastSelection; UnicodeString tempLast; iterator.setText(&testText); int32_t selectionEnd = iterator.last(); int32_t selectionStart = iterator.previous(); if(selectionStart != BreakIterator::DONE) { testText.extractBetween(selectionStart, selectionEnd, tempLast); expectedLastSelection = result->lastElement(); if(tempLast != expectedLastSelection) { errln( (UnicodeString)"\n\n" + "### Error in TTestFindWord::doLastSelectionTest. Last selection not equal to what expected." + "\nExpexcted : " + expectedLastSelection + " - length : " + expectedLastSelection.size() + "\nSelected : " + tempLast + " - length : " + tempLast.size() + "\n"); success = FALSE; } } else if (selectionEnd != 0 || testText.size() != 0) { errln((UnicodeString)"\n### Error in TTestFindWord::doLastSelectionTest. Could not get last selection."+ "["+selectionStart+","+selectionEnd+"]"); success = FALSE; } if(success) { logln( (UnicodeString)"TTestFindWord::doLastSelectionTest \n" + "\nExpexcted last selection: " + expectedLastSelection + "\nCalculated last selection: " + tempLast + "\n"); } } /** * @bug 4052418 4068139 */ void IntlTestTextBoundary::doForwardIndexSelectionTest(BreakIterator& iterator, UnicodeString& testText, Vector* result) { int32_t arrayCount = result->size(); int32_t textLength = testText.size(); iterator.setText(&testText); for(UTextOffset offset = 0; offset < textLength; offset++) { int32_t selBegin = iterator.preceding(offset); int32_t selEnd = iterator.following(offset); bool_t isBound = iterator.isBoundary(offset); int32_t entry = 0; int32_t pos = 0; if (selBegin != BreakIterator::DONE) { while (pos < selBegin && entry < arrayCount) { pos += ((UnicodeString)(result->elementAt(entry))).size(); ++entry; } if (pos != selBegin) { errln((UnicodeString)"With offset = " + offset + ", got back spurious " + selBegin + " from preceding."); continue; } else { pos += ((UnicodeString)(result->elementAt(entry))).size(); ++entry; } } if (isBound) { if (pos != offset) { errln((UnicodeString)"isBoundary() erroneously returned true with offset = " + offset); continue; } else { pos += ((UnicodeString)(result->elementAt(entry))).size(); ++entry; } } if (pos != selEnd) { errln((UnicodeString)"With offset = " + offset + ", got back erroneous " + selEnd + " from following."); continue; } } } /** * @bug 4052418 4068139 */ void IntlTestTextBoundary::doBackwardIndexSelectionTest(BreakIterator& iterator, UnicodeString& testText, Vector* result) { int32_t arrayCount = result->size(); int32_t textLength = testText.size(); iterator.setText(&testText); for(UTextOffset offset = textLength - 1; offset >= 0; offset--) { int32_t selBegin = iterator.preceding(offset); int32_t selEnd = iterator.following(offset); bool_t isBound = iterator.isBoundary(offset); int32_t entry = 0; int32_t pos = 0; if (selBegin != BreakIterator::DONE) { while (pos < selBegin && entry < arrayCount) { pos += ((UnicodeString)(result->elementAt(entry))).size(); ++entry; } if (pos != selBegin) { errln((UnicodeString)"With offset = " + offset + ", got back spurious " + selBegin + " from preceding."); continue; } else { pos += ((UnicodeString)(result->elementAt(entry))).size(); ++entry; } } if (isBound) { if (pos != offset) { errln((UnicodeString)"isBoundary() erroneously returned true with offset = " + offset); continue; } else { pos += ((UnicodeString)(result->elementAt(entry))).size(); ++entry; } } if (pos != selEnd) { errln((UnicodeString)"With offset = " + offset + ", got back erroneous " + selEnd + " from following."); continue; } } } /* * @bug 4153072 */ void IntlTestTextBoundary::TestBug4153072() { BreakIterator *iter = BreakIterator::createWordInstance(); UnicodeString str("...Hello, World!..."); int32_t begin = 3; int32_t end = str.size() - 3; bool_t gotException = FALSE; bool_t dummy; StringCharacterIterator textIterator(str, begin, end, begin); iter->adoptText(&textIterator); for (int index = -1; index < begin + 1; ++index) { dummy = iter->isBoundary(index); if (index < begin && dummy == TRUE) { errln((UnicodeString)"Didn't handle preceeding correctly with offset = " + index + " and begin index = " + begin); } } delete iter; } void IntlTestTextBoundary::doMultipleSelectionTest(BreakIterator& iterator, UnicodeString& testText) { iterator.setText(&testText); BreakIterator* testIterator = iterator.clone(); int32_t offset = iterator.first(); int32_t testOffset; int32_t count = 0; logln("doMultipleSelectionTest text of length: "+testText.size()); if (*testIterator != iterator) errln("clone() or operator!= failed: two clones compared unequal"); do { testOffset = testIterator->first(); testOffset = testIterator->next(count); if (offset != testOffset) errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset); if (offset != BreakIterator::DONE) { count++; offset = iterator.next(); if (offset != BreakIterator::DONE && *testIterator == iterator) errln("operator== failed: Two unequal iterators compared equal."); } } while (offset != BreakIterator::DONE); // now do it backwards... offset = iterator.last(); count = 0; do { testOffset = testIterator->last(); testOffset = testIterator->next(count); if (offset != testOffset) errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset); if (offset != BreakIterator::DONE) { count--; offset = iterator.previous(); } } while (offset != BreakIterator::DONE); delete testIterator; } void IntlTestTextBoundary::doBreakInvariantTest(BreakIterator& tb, UnicodeString& testChars) { UnicodeString work("aaa"); int errorCount = 0; // a break should always occur after CR (unless followed by LF), LF, PS, and LS UnicodeString breaks = CharsToUnicodeString("\r\n\\u2029\\u2028"); UTextOffset i, j; for (i = 0; i < breaks.size(); i++) { work[1] = breaks[i]; for (j = 0; j < testChars.size(); j++) { work[0] = testChars[j]; for (int k = 0; k < testChars.size(); k++) { UChar c = testChars[k]; // if a cr is followed by lf, ps, ls or etx, don't do the check (that's // not supposed to work) if (work[1] == '\r' && (c == '\n' || c == 0x2029 || c == 0x2028 || c == 0x0003)) continue; work[2] = testChars[k]; tb.setText(&work); bool_t seen2 = FALSE; for (int l = tb.first(); l != BreakIterator::DONE; l = tb.next()) { if (l == 2) seen2 = TRUE; } if (!seen2) { errln("No break between U+" + UCharToUnicodeString(work[1]) + " and U+" + UCharToUnicodeString(work[2])); errorCount++; if (errorCount >= 75) return; } } } } } void IntlTestTextBoundary::doOtherInvariantTest(BreakIterator& tb, UnicodeString& testChars) { UnicodeString work("a\r\na"); int32_t errorCount = 0; UTextOffset i, j; // a break should never occur between CR and LF for (i = 0; i < testChars.size(); i++) { work[0] = testChars[i]; for (j = 0; j < testChars.size(); j++) { work[3] = testChars[j]; tb.setText(&work); for (int32_t k = tb.first(); k != BreakIterator::DONE; k = tb.next()) if (k == 2) { errln("Break between CR and LF in string U+" + UCharToUnicodeString(work[0]) + ", U+d U+a U+" + UCharToUnicodeString(work[3])); errorCount++; if (errorCount >= 75) return; } } } // a break should never occur before a non-spacing mark, unless the preceding // character is CR, LF, PS, or LS work.remove(); work += "aaaa"; for (i = 0; i < testChars.size(); i++) { UChar c = testChars[i]; if (c == '\n' || c == '\r' || c == 0x2029 || c == 0x2028 || c == 0x0003) continue; work[1] = c; for (j = 0; j < testChars.size(); j++) { c = testChars[j]; if ((Unicode::getType(c) != Unicode::NON_SPACING_MARK) && (Unicode::getType(c) != Unicode::ENCLOSING_MARK)) continue; work[2] = c; tb.setText(&work); for (int k = tb.first(); k != BreakIterator::DONE; k = tb.next()) if (k == 2) { errln("Break between U+" + UCharToUnicodeString(work[1]) + " and U+" + UCharToUnicodeString(work[2])); errorCount++; if (errorCount >= 75) return; } } } } void IntlTestTextBoundary::sample(BreakIterator& tb, UnicodeString& text, UnicodeString& title) { UnicodeString substring; bool_t verboseWas = verbose; verbose = TRUE; logln("-------------------------"+title+" length = "+text.size()); tb.setText(&text); int32_t start = tb.first(); int32_t end; for (end = tb.next(); end != BreakIterator::DONE; end = tb.next()) { text.extractBetween(start, end, substring); logln(UnicodeString("[")+start+","+end+"] \""+substring+"\""); start = end; } verbose = verboseWas; } void IntlTestTextBoundary::TestPreceding() { UnicodeString words3("aaa bbb ccc"); BreakIterator* e = BreakIterator::createWordInstance(); e->setText( &words3 ); e->first(); UTextOffset p1 = e->next(); UTextOffset p2 = e->next(); UTextOffset p3 = e->next(); UTextOffset p4 = e->next(); UTextOffset f = e->following( p2+1 ); UTextOffset p = e->preceding( p2+1 ); if (f!=p3) errln("IntlTestTextBoundary::TestPreceding: f!=p3"); if (p!=p2) errln("IntlTestTextBoundary::TestPreceding: p!=p2"); if (!e->isBoundary(p2) || e->isBoundary(p2+1) || !e->isBoundary(p3)) { errln("IntlTestTextBoundary::TestPreceding: isBoundary err"); } delete e; }