home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 6 / AACD06.ISO / AACD / Programming / ICU / src / icu / source / test / intltest / ittxtbd.cpp < prev    next >
Encoding:
C/C++ Source or Header  |  1999-10-19  |  62.0 KB  |  1,536 lines

  1. /*
  2. *****************************************************************************************
  3. *                                                                                       *
  4. * COPYRIGHT:                                                                            *
  5. *   (C) Copyright Taligent, Inc.,  1997                                                 *
  6. *   (C) Copyright International Business Machines Corporation,  1997-1998               *
  7. *   Licensed Material - Program-Property of IBM - All Rights Reserved.                  *
  8. *   US Government Users Restricted Rights - Use, duplication, or disclosure             *
  9. *   restricted by GSA ADP Schedule Contract with IBM Corp.                              *
  10. *                                                                                       *
  11. *****************************************************************************************
  12. */
  13.  
  14. #include "intltest.h"
  15. #include "brkiter.h"
  16. #include "unicode.h"
  17. #include <stdio.h>
  18. //#include "txbdapi.h"    // BreakIteratorAPIC
  19.  
  20. //--------------------------------------------------------------------------------------
  21. /**
  22.  * "Vector" class for holding test tables
  23.  * (this class is actually a linked list, but we use the name and API of the
  24.  * java.util.Vector class to keep as much of our test code as possible the same.)
  25.  */
  26. class Enumeration { // text enumeration
  27. public:
  28.   virtual bool_t hasMoreElements() = 0;
  29.   virtual UnicodeString nextElement() = 0;
  30. };
  31.  
  32. class Vector { // text vector
  33. public:
  34.  
  35.   class TextLink {
  36.   public:
  37.     TextLink() : fLink(0), fText() {}
  38.     TextLink(TextLink* link, UnicodeString text) : fLink(link), fText(text) {}
  39.  
  40.     TextLink* fLink;
  41.     UnicodeString fText;
  42.   };
  43.  
  44. public:
  45.   TextLink fBase;
  46.   TextLink* fEnd;
  47.   int32_t fSize;
  48.  
  49. public:
  50.   class VectorEnumeration : public Enumeration {
  51.   public:
  52.     VectorEnumeration(Vector* vector) : fVector(vector), fPos(&vector->fBase) {}
  53.     
  54.     bool_t hasMoreElements() { return fPos->fLink != &fVector->fBase; }
  55.     UnicodeString nextElement() { fPos = fPos->fLink; return fPos->fText; }
  56.  
  57.     Vector* fVector;
  58.     TextLink* fPos;
  59.   };
  60.  
  61.   Vector() : fBase(), fEnd(&fBase), fSize(0) { fBase.fLink = &fBase; }
  62.  
  63.   ~Vector() { 
  64.     while (fBase.fLink != &fBase) { 
  65.         TextLink* link = fBase.fLink;
  66.         fBase.fLink = link->fLink;
  67.         delete link;
  68.         }
  69.   }
  70.  
  71.   void addElement(UnicodeString text) { fEnd->fLink = new TextLink(&fBase, text); fEnd = fEnd->fLink; ++fSize; }
  72.  
  73.   UnicodeString elementAt(int32_t pos) {
  74.         if (pos >= fSize)
  75.           return UnicodeString();
  76.  
  77.     TextLink* link = fBase.fLink; 
  78.     while (pos-- > 0) link = link->fLink; 
  79.     return link->fText;
  80.   }
  81.   UnicodeString lastElement() { return fEnd == &fBase ? UnicodeString() : fEnd->fText; }
  82.   int32_t size() { return fSize; }
  83.  
  84.   Enumeration* elements() { return new VectorEnumeration(this); }
  85.  
  86. };
  87.  
  88. //--------------------------------------------------------------------------------------
  89. /**
  90.  * IntlTestTextBoundary is medium top level test class for everything in the directory "findword".
  91.  */
  92.  
  93. #include "utypes.h"
  94. #include "ittxtbd.h"
  95.  
  96. #include <string.h>
  97. #include "schriter.h"
  98.  
  99. // [HSYS] Just to make it easier to use with UChar array.
  100. UnicodeString CharsToUnicodeString(const char* chars)
  101. {
  102.     int len = strlen(chars);
  103.     int i;
  104.     UnicodeString buffer;
  105.     for (i = 0; i < len;) {
  106.         if ((chars[i] == '\\') && (i+1 < len) && (chars[i+1] == 'u')) {
  107.             int unicode;
  108.             sscanf(&(chars[i+2]), "%4X", &unicode);
  109.             buffer += (UChar)unicode;
  110.             i += 6;
  111.         } else {
  112.             buffer += (UChar)chars[i++];
  113.         }
  114.     }
  115.     return buffer;
  116. }
  117.  
  118.  
  119. const UChar IntlTestTextBoundary::cannedTestArray[] = {
  120.     0x0001, 0x0002, 0x0003, 0x0004, ' ', '!', '\\', '"', '#', '$', '%', '&', '(', ')', '+', '-', '0', '1', 
  121.     '2', '3', '4', '<', '=', '>', 'A', 'B', 'C', 'D', 'E', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', '{', 
  122.     '}', '|', ',',  0x00a0, 0x00a2,
  123.     0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00ab, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b2, 0x00b3, 
  124.     0x00b4, 0x00b9, 0x00bb, 0x00bc, 0x00bd, 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x0300, 0x0301, 0x0302, 0x0303,
  125.     0x0304, 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x0903, 0x093e, 0x093f, 0x0940, 0x0949, 0x0f3a, 0x0f3b, 0x2000,
  126.     0x2001, 0x2002, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2028, 0x2029, 0x202a, 0x203e, 0x203f,
  127.     0x2040, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x0000
  128. };
  129.  
  130. UnicodeString* IntlTestTextBoundary::cannedTestChars = 0;
  131.  
  132. //---------------------------------------------
  133. // setup methods
  134. //---------------------------------------------
  135.  
  136. IntlTestTextBoundary::IntlTestTextBoundary()
  137. {
  138.     UnicodeString temp(cannedTestArray);
  139.     cannedTestChars = new UnicodeString();
  140.     *cannedTestChars += 0x0000;
  141.     *cannedTestChars += temp;
  142.     addTestWordData();
  143.     addTestSentenceData();
  144.     addTestLineData();
  145.     addTestCharacterData();
  146. }
  147.  
  148. IntlTestTextBoundary::~IntlTestTextBoundary()
  149. {
  150.     delete wordSelectionData;
  151.     delete sentenceSelectionData;
  152.     delete lineSelectionData;
  153.     delete characterSelectionData;
  154.     delete cannedTestChars;
  155. }
  156.  
  157. /**
  158.  * @bug 4097779 4098467 4117554
  159.  */
  160. void IntlTestTextBoundary::addTestWordData()
  161. {
  162.     wordSelectionData = new Vector();
  163.  
  164.     wordSelectionData->addElement("12,34");
  165.  
  166.     wordSelectionData->addElement(" ");
  167.     wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A2)));   //cent sign
  168.     wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A3)));   //pound sign
  169.     wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A4)));   //currency sign
  170.     wordSelectionData->addElement(UCharToUnicodeString((UChar)(0x00A5)));   //yen sign
  171.     wordSelectionData->addElement("alpha-beta-gamma");
  172.     wordSelectionData->addElement(".");
  173.     wordSelectionData->addElement(" ");
  174.     wordSelectionData->addElement("Badges");
  175.     wordSelectionData->addElement("?");
  176.     wordSelectionData->addElement(" ");
  177.     wordSelectionData->addElement("BADGES");
  178.     wordSelectionData->addElement("!");
  179.     wordSelectionData->addElement("?");
  180.     wordSelectionData->addElement("!");
  181.     wordSelectionData->addElement(" ");
  182.     wordSelectionData->addElement("We");
  183.     wordSelectionData->addElement(" ");
  184.     wordSelectionData->addElement("don't");
  185.     wordSelectionData->addElement(" ");
  186.     wordSelectionData->addElement("need");
  187.     wordSelectionData->addElement(" ");
  188.     wordSelectionData->addElement("no");
  189.     wordSelectionData->addElement(" ");
  190.     wordSelectionData->addElement("STINKING");
  191.     wordSelectionData->addElement(" ");
  192.     wordSelectionData->addElement("BADGES");
  193.     wordSelectionData->addElement("!");
  194.     wordSelectionData->addElement("!");
  195.     wordSelectionData->addElement("!");
  196.  
  197.     wordSelectionData->addElement("012.566,5");
  198.     wordSelectionData->addElement(" ");
  199.     wordSelectionData->addElement("123.3434,900");
  200.     wordSelectionData->addElement(" ");
  201.     wordSelectionData->addElement("1000,233,456.000");
  202.     wordSelectionData->addElement(" ");
  203.     wordSelectionData->addElement("1,23.322%");
  204.     wordSelectionData->addElement(" ");
  205.     wordSelectionData->addElement("123.1222");
  206.  
  207.     wordSelectionData->addElement(" ");
  208.     wordSelectionData->addElement("$123,000.20");
  209.  
  210.     wordSelectionData->addElement(" ");
  211.     wordSelectionData->addElement("179.01%");
  212.  
  213.     wordSelectionData->addElement("Hello");
  214.     wordSelectionData->addElement(",");
  215.     wordSelectionData->addElement(" ");
  216.     wordSelectionData->addElement("how");
  217.     wordSelectionData->addElement(" ");
  218.     wordSelectionData->addElement("are");
  219.     wordSelectionData->addElement(" ");
  220.     wordSelectionData->addElement("you");
  221.     wordSelectionData->addElement(" ");
  222.     wordSelectionData->addElement("X");
  223.     wordSelectionData->addElement(" ");
  224.  
  225.     wordSelectionData->addElement("Now");
  226.     wordSelectionData->addElement("\r");
  227.     wordSelectionData->addElement("is");
  228.     wordSelectionData->addElement("\n");
  229.     wordSelectionData->addElement("the");
  230.     wordSelectionData->addElement("\r\n");
  231.     wordSelectionData->addElement("time");
  232.     wordSelectionData->addElement("\n");
  233.     wordSelectionData->addElement("\r");
  234.     wordSelectionData->addElement("for");
  235.     wordSelectionData->addElement("\r");
  236.     wordSelectionData->addElement("\r");
  237.     wordSelectionData->addElement("all");
  238.     wordSelectionData->addElement(" ");
  239.  
  240.     // to test for bug #4097779
  241.     wordSelectionData->addElement(CharsToUnicodeString("aa\\u0300a"));
  242.     wordSelectionData->addElement(" ");
  243.  
  244.     // to test for bug #4098467
  245.     // What follows is a string of Korean characters (I found it in the Yellow Pages
  246.     // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
  247.     // it correctly), first as precomposed syllables, and then as conjoining jamo.
  248.     // Both sequences should be semantically identical and break the same way.
  249.     // precomposed syllables...
  250.     wordSelectionData->addElement(CharsToUnicodeString("\\uc0c1\\ud56d"));
  251.     wordSelectionData->addElement(" ");
  252.     wordSelectionData->addElement(CharsToUnicodeString("\\ud55c\\uc778"));
  253.     wordSelectionData->addElement(" ");
  254.     wordSelectionData->addElement(CharsToUnicodeString("\\uc5f0\\ud569"));
  255.     wordSelectionData->addElement(" ");
  256.     wordSelectionData->addElement(CharsToUnicodeString("\\uc7a5\\ub85c\\uad50\\ud68c"));
  257.     wordSelectionData->addElement(" ");
  258.     // conjoining jamo...
  259.     wordSelectionData->addElement(CharsToUnicodeString("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc"));
  260.     wordSelectionData->addElement(" ");
  261.     wordSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab"));
  262.     wordSelectionData->addElement(" ");
  263.     wordSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8"));
  264.     wordSelectionData->addElement(" ");
  265.     wordSelectionData->addElement(CharsToUnicodeString("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c"));
  266.     wordSelectionData->addElement(" ");
  267.  
  268.     // this is a test for bug #4117554: the ideographic iteration mark (U+3005) should
  269.     // count as a Kanji character for the purposes of word breaking
  270.     wordSelectionData->addElement("abc");
  271.     wordSelectionData->addElement(CharsToUnicodeString("\\u4e01\\u4e02\\u3005\\u4e03\\u4e03"));
  272.     wordSelectionData->addElement("abc");
  273.  
  274.     Enumeration *elems = wordSelectionData->elements();
  275.     testWordText = createTestData(elems);
  276.     delete elems;
  277. }
  278.  
  279. const UChar kParagraphSeparator = 0x2029;
  280. const UChar kLineSeparator = 0x2028;
  281.  
  282. /**
  283.  * @bug 4111338 4117554 4113835
  284.  */
  285. void IntlTestTextBoundary::addTestSentenceData()
  286. {
  287.     sentenceSelectionData = new Vector();
  288.     sentenceSelectionData->addElement("This is a simple sample sentence. ");
  289.     sentenceSelectionData->addElement("(This is it.) ");
  290.     sentenceSelectionData->addElement("This is a simple sample sentence. ");
  291.     sentenceSelectionData->addElement("\"This isn\'t it.\" ");
  292.     sentenceSelectionData->addElement("Hi! ");
  293.     sentenceSelectionData->addElement("This is a simple sample sentence. ");
  294.     sentenceSelectionData->addElement("It does not have to make any sense as you can see. ");
  295.     sentenceSelectionData->addElement("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
  296.     sentenceSelectionData->addElement("Che la dritta via aveo smarrita. ");
  297.     sentenceSelectionData->addElement("He said, that I said, that you said!! ");
  298.  
  299.     sentenceSelectionData->addElement("Don't rock the boat." + UCharToUnicodeString(kParagraphSeparator));
  300.  
  301.     sentenceSelectionData->addElement("Because I am the daddy, that is why. ");
  302.     sentenceSelectionData->addElement("Not on my time (el timo.)! ");
  303.  
  304.     sentenceSelectionData->addElement("So what!!" + UCharToUnicodeString(kParagraphSeparator));
  305.  
  306.     sentenceSelectionData->addElement("\"But now,\" he said, \"I know!\" ");
  307.     sentenceSelectionData->addElement("Harris thumbed down several, including \"Away We Go\" (which became the huge success Oklahoma!). ");
  308.     sentenceSelectionData->addElement("One species, B. anthracis, is highly virulent.\n");
  309.     sentenceSelectionData->addElement("Wolf said about Sounder:\"Beautifully thought-out and directed.\" ");
  310.     sentenceSelectionData->addElement("Have you ever said, \"This is where\tI shall live\"? ");
  311.     sentenceSelectionData->addElement("He answered, \"You may not!\" ");
  312.     sentenceSelectionData->addElement("Another popular saying is: \"How do you do?\". ");
  313.     sentenceSelectionData->addElement("Yet another popular saying is: \'I\'m fine thanks.\' ");
  314.     sentenceSelectionData->addElement("What is the proper use of the abbreviation pp.? ");
  315.     sentenceSelectionData->addElement("Yes, I am definatelly 12\" tall!!");
  316.  
  317.     // test for bug #4113835: \n and \r count as spaces, not as paragraph breaks
  318.     sentenceSelectionData->addElement(CharsToUnicodeString("Now\ris\nthe\r\ntime\n\rfor\r\rall\\u2029"));
  319.  
  320.     // test for bug #4111338: Don't break sentences at the boundary between CJK
  321.     // and other letters
  322.     sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165:\"JAVA\\u821c")
  323.         + CharsToUnicodeString("\\u8165\\u7fc8\\u51ce\\u306d,\\u2494\\u56d8\\u4ec0\\u60b1\\u8560\\u51ba")
  324.         + CharsToUnicodeString("\\u611d\\u57b6\\u2510\\u5d46\".\\u2029"));
  325.     sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165\\u9de8")
  326.         + CharsToUnicodeString("\\u97e4JAVA\\u821c\\u8165\\u7fc8\\u51ce\\u306d\\ue30b\\u2494\\u56d8\\u4ec0")
  327.         + CharsToUnicodeString("\\u60b1\\u8560\\u51ba\\u611d\\u57b6\\u2510\\u5d46\\u97e5\\u7751\\u2029"));
  328.     sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165\\u9de8\\u97e4")
  329.         + CharsToUnicodeString("\\u6470\\u8790JAVA\\u821c\\u8165\\u7fc8\\u51ce\\u306d\\ue30b\\u2494\\u56d8")
  330.         + CharsToUnicodeString("\\u4ec0\\u60b1\\u8560\\u51ba\\u611d\\u57b6\\u2510\\u5d46\\u97e5\\u7751\\u2029"));
  331.     sentenceSelectionData->addElement(CharsToUnicodeString("He said, \"I can go there.\"\\u2029"));
  332.  
  333.     // test for bug #4117554: Treat fullwidth variants of .!? the same as their
  334.     // normal counterparts
  335.     sentenceSelectionData->addElement(CharsToUnicodeString("I know I'm right\\uff0e "));
  336.     sentenceSelectionData->addElement(CharsToUnicodeString("Right\\uff1f "));
  337.     sentenceSelectionData->addElement(CharsToUnicodeString("Right\\uff01 "));
  338.  
  339.     // test for bug #4117554: Don't break sentences at boundary between CJK and digits
  340.     sentenceSelectionData->addElement(CharsToUnicodeString("\\u5487\\u67ff\\ue591\\u5017\\u61b3\\u60a1\\u9510\\u8165\\u9de8")
  341.         + CharsToUnicodeString("\\u97e48888\\u821c\\u8165\\u7fc8\\u51ce\\u306d\\ue30b\\u2494\\u56d8\\u4ec0")
  342.         + CharsToUnicodeString("\\u60b1\\u8560\\u51ba\\u611d\\u57b6\\u2510\\u5d46\\u97e5\\u7751\\u2029"));
  343.  
  344.     // test for bug #4117554: Break sentence between a sentence terminator and
  345.     // opening punctuation
  346.     sentenceSelectionData->addElement("no?");
  347.     sentenceSelectionData->addElement("(yes)" + CharsToUnicodeString("\\u2029"));
  348.  
  349.     // test for bug #4158381: Don't break sentence after period if it isn't
  350.     // followed by a space
  351.     sentenceSelectionData->addElement("Test <code>Flags.Flag</code> class.  ");
  352.     sentenceSelectionData->addElement("Another test." + CharsToUnicodeString("\\u2029"));
  353.  
  354.     // test for bug #4158381: No breaks when there are no terminators around
  355.     sentenceSelectionData->addElement("<P>Provides a set of "lightweight" (all-java<FONT SIZE=\"-2\"><SUP>TM</SUP></FONT> language) components that, to the maximum degree possible, work the same on all platforms.  ");
  356.     sentenceSelectionData->addElement("Another test." + CharsToUnicodeString("\\u2029"));
  357.  
  358.     // test for bug #4143071: Make sure sentences that end with digits
  359.     // work right
  360.     sentenceSelectionData->addElement("Today is the 27th of May, 1998.  ");
  361.     sentenceSelectionData->addElement("Tomorrow with be 28 May 1998.  ");
  362.     sentenceSelectionData->addElement("The day after will be the 30th." 
  363.                                         + CharsToUnicodeString("\\u2029"));
  364.  
  365.     // test for bug #4152416: Make sure sentences ending with a capital
  366.     // letter are treated correctly
  367.     sentenceSelectionData->addElement("The type of all primitive <code>boolean</code> values accessed in the target VM.  ");
  368.     sentenceSelectionData->addElement("Calls to xxx will return an implementor of this interface." + CharsToUnicodeString("\\u2029"));
  369.  
  370.     // test for bug #4152117: Make sure sentence breaking is handling
  371.     // punctuation correctly [COULD NOT REPRODUCE THIS BUG, BUT TEST IS
  372.     // HERE TO MAKE SURE IT DOESN'T CROP UP]
  373.     sentenceSelectionData->addElement("Constructs a randomly generated BigInteger, uniformly distributed over the range <tt>0</tt> to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive.  ");
  374.     sentenceSelectionData->addElement("The uniformity of the distribution assumes that a fair source of random bits is provided in <tt>rnd</tt>.  ");
  375.     sentenceSelectionData->addElement("Note that this constructor always constructs a non-negative BigInteger." + CharsToUnicodeString("\\u2029"));
  376.  
  377.     Enumeration *elems = sentenceSelectionData->elements();
  378.     testSentenceText = createTestData(elems);
  379.     delete elems;
  380. }
  381.  
  382. /**
  383.  * @bug 4068133 4086052 4035266 4097920 4098467 4117554
  384.  */
  385. void IntlTestTextBoundary::addTestLineData()
  386. {
  387.     lineSelectionData = new Vector();
  388.     lineSelectionData->addElement("Multi-");
  389.     lineSelectionData->addElement("Level ");
  390.     lineSelectionData->addElement("example ");
  391.     lineSelectionData->addElement("of ");
  392.     lineSelectionData->addElement("a ");
  393.     lineSelectionData->addElement("semi-");
  394.     lineSelectionData->addElement("idiotic ");
  395.     lineSelectionData->addElement("non-");
  396.     lineSelectionData->addElement("sensical ");
  397.     lineSelectionData->addElement("(non-");
  398.     lineSelectionData->addElement("important) ");
  399.     lineSelectionData->addElement("sentence. ");
  400.  
  401.     lineSelectionData->addElement("Hi  ");
  402.     lineSelectionData->addElement("Hello ");
  403.     lineSelectionData->addElement("How\n");
  404.     lineSelectionData->addElement("are\r");
  405.     lineSelectionData->addElement("you" + UCharToUnicodeString(kLineSeparator));
  406.     lineSelectionData->addElement("fine.\t");
  407.     lineSelectionData->addElement("good.  ");
  408.  
  409.     lineSelectionData->addElement("Now\r");
  410.     lineSelectionData->addElement("is\n");
  411.     lineSelectionData->addElement("the\r\n");
  412.     lineSelectionData->addElement("time\n");
  413.     lineSelectionData->addElement("\r");
  414.     lineSelectionData->addElement("for\r");
  415.     lineSelectionData->addElement("\r");
  416.     lineSelectionData->addElement("all");
  417.  
  418.     // to test for bug #4068133
  419.     lineSelectionData->addElement(CharsToUnicodeString("\\u96f6"));
  420.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e00\\u3002"));
  421.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e8c\\u3001"));
  422.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e09\\u3002\\u3001"));
  423.     lineSelectionData->addElement(CharsToUnicodeString("\\u56db\\u3001\\u3002\\u3001"));
  424.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e94,"));
  425.     lineSelectionData->addElement(CharsToUnicodeString("\\u516d."));
  426.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e03.\\u3001,\\u3002"));
  427.     lineSelectionData->addElement(CharsToUnicodeString("\\u516b"));
  428.  
  429.     // to test for bug #4086052
  430.     lineSelectionData->addElement(CharsToUnicodeString("foo\\u00a0bar "));
  431. //        lineSelectionData->addElement("foo\\ufeffbar");
  432.  
  433.     // to test for bug #4097920
  434.     lineSelectionData->addElement("dog,");
  435.     lineSelectionData->addElement("cat,");
  436.     lineSelectionData->addElement("mouse ");
  437.     lineSelectionData->addElement("(one)");
  438.     lineSelectionData->addElement("(two)\n");
  439.  
  440.     // to test for bug #4035266
  441.     lineSelectionData->addElement("The ");
  442.     lineSelectionData->addElement("balance ");
  443.     lineSelectionData->addElement("is ");
  444.     lineSelectionData->addElement("$-23,456.78, ");
  445.     lineSelectionData->addElement("not ");
  446.     lineSelectionData->addElement("-$32,456.78!\n");
  447.  
  448.     // to test for bug #4098467
  449.     // What follows is a string of Korean characters (I found it in the Yellow Pages
  450.     // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
  451.     // it correctly), first as precomposed syllables, and then as conjoining jamo.
  452.     // Both sequences should be semantically identical and break the same way.
  453.     // precomposed syllables...
  454.     lineSelectionData->addElement(CharsToUnicodeString("\\uc0c1\\ud56d "));
  455.     lineSelectionData->addElement(CharsToUnicodeString("\\ud55c\\uc778 "));
  456.     lineSelectionData->addElement(CharsToUnicodeString("\\uc5f0\\ud569 "));
  457.     lineSelectionData->addElement(CharsToUnicodeString("\\uc7a5\\ub85c\\uad50\\ud68c "));
  458.     // conjoining jamo...
  459.     lineSelectionData->addElement(CharsToUnicodeString("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc "));
  460.     lineSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab "));
  461.     lineSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8 "));
  462.     lineSelectionData->addElement(CharsToUnicodeString("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c"));
  463.  
  464.     // to test for bug #4117554: Fullwidth .!? should be treated as postJwrd
  465.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e01\\uff0e"));
  466.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e02\\uff01"));
  467.     lineSelectionData->addElement(CharsToUnicodeString("\\u4e03\\uff1f"));
  468.  
  469.     Enumeration *elems = lineSelectionData->elements();
  470.     testLineText = createTestData(elems);
  471.     delete elems;
  472. }
  473.  
  474. /*
  475. const UnicodeString graveS = "S" + (UChar)0x0300;
  476. const UnicodeString acuteBelowI = "i" + UCharToUnicodeString(0x0317);
  477. const UnicodeString acuteE = "e" + UCharToUnicodeString(0x0301);
  478. const UnicodeString circumflexA = "a" + UCharToUnicodeString(0x0302);
  479. const UnicodeString tildeE = "e" + UCharToUnicodeString(0x0303);
  480. */
  481.  
  482. /**
  483.  * @bug 4098467
  484.  */
  485. void IntlTestTextBoundary::addTestCharacterData()
  486. {
  487.     characterSelectionData = new Vector();
  488.     characterSelectionData->addElement("S" + UCharToUnicodeString(0x0300)); //graveS
  489.     characterSelectionData->addElement("i" + UCharToUnicodeString(0x0301)); // acuteBelowI
  490.     characterSelectionData->addElement("m");
  491.     characterSelectionData->addElement("p");
  492.     characterSelectionData->addElement("l");
  493.     characterSelectionData->addElement("e" + UCharToUnicodeString(0x0301));  // acuteE
  494.     characterSelectionData->addElement(" ");
  495.     characterSelectionData->addElement("s");
  496.     characterSelectionData->addElement("a" + UCharToUnicodeString(0x0302));  // circumflexA
  497.     characterSelectionData->addElement("m");
  498.     characterSelectionData->addElement("p");
  499.     characterSelectionData->addElement("l");
  500.     characterSelectionData->addElement("e" + UCharToUnicodeString(0x0303));  // tildeE
  501.     characterSelectionData->addElement(".");
  502.     characterSelectionData->addElement("w");
  503.     characterSelectionData->addElement("a" + UCharToUnicodeString(0x0302));  // circumflexA
  504.     characterSelectionData->addElement("w");
  505.     characterSelectionData->addElement("a");
  506.     characterSelectionData->addElement("f");
  507.     characterSelectionData->addElement("q");
  508.     characterSelectionData->addElement("\n");
  509.     characterSelectionData->addElement("\r");
  510.     characterSelectionData->addElement("\r\n");
  511.     characterSelectionData->addElement("\n");
  512.  
  513.     // to test for bug #4098467
  514.     // What follows is a string of Korean characters (I found it in the Yellow Pages
  515.     // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
  516.     // it correctly), first as precomposed syllables, and then as conjoining jamo.
  517.     // Both sequences should be semantically identical and break the same way.
  518.     // precomposed syllables...
  519.     characterSelectionData->addElement(CharsToUnicodeString("\\uc0c1"));
  520.     characterSelectionData->addElement(CharsToUnicodeString("\\ud56d"));
  521.     characterSelectionData->addElement(" ");
  522.     characterSelectionData->addElement(CharsToUnicodeString("\\ud55c"));
  523.     characterSelectionData->addElement(CharsToUnicodeString("\\uc778"));
  524.     characterSelectionData->addElement(" ");
  525.     characterSelectionData->addElement(CharsToUnicodeString("\\uc5f0"));
  526.     characterSelectionData->addElement(CharsToUnicodeString("\\ud569"));
  527.     characterSelectionData->addElement(" ");
  528.     characterSelectionData->addElement(CharsToUnicodeString("\\uc7a5"));
  529.     characterSelectionData->addElement(CharsToUnicodeString("\\ub85c"));
  530.     characterSelectionData->addElement(CharsToUnicodeString("\\uad50"));
  531.     characterSelectionData->addElement(CharsToUnicodeString("\\ud68c"));
  532.     characterSelectionData->addElement(" ");
  533.     // conjoining jamo...
  534.     characterSelectionData->addElement(CharsToUnicodeString("\\u1109\\u1161\\u11bc"));
  535.     characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11bc"));
  536.     characterSelectionData->addElement(" ");
  537.     characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11ab"));
  538.     characterSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1175\\u11ab"));
  539.     characterSelectionData->addElement(" ");
  540.     characterSelectionData->addElement(CharsToUnicodeString("\\u110b\\u1167\\u11ab"));
  541.     characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u1161\\u11b8"));
  542.     characterSelectionData->addElement(" ");
  543.     characterSelectionData->addElement(CharsToUnicodeString("\\u110c\\u1161\\u11bc"));
  544.     characterSelectionData->addElement(CharsToUnicodeString("\\u1105\\u1169"));
  545.     characterSelectionData->addElement(CharsToUnicodeString("\\u1100\\u116d"));
  546.     characterSelectionData->addElement(CharsToUnicodeString("\\u1112\\u116c"));
  547.  
  548.     Enumeration *elems = characterSelectionData->elements();
  549.     testCharacterText = createTestData(elems);
  550.     delete elems;
  551. }
  552.  
  553. UnicodeString IntlTestTextBoundary::createTestData(Enumeration* e)
  554. {
  555.   UnicodeString result = "";
  556.  
  557.   while (e->hasMoreElements()) {
  558.     result += e->nextElement();
  559.   }
  560.  
  561.   return result;
  562. }
  563.  
  564. //---------------------------------------------
  565. // SentenceBreak tests
  566. //---------------------------------------------
  567.  
  568. void IntlTestTextBoundary::TestForwardSentenceSelection()
  569. {
  570.     BreakIterator* e = BreakIterator::createSentenceInstance();
  571.     doForwardSelectionTest(*e, testSentenceText, sentenceSelectionData);
  572.     delete e;
  573. }
  574.  
  575. void IntlTestTextBoundary::TestFirstSentenceSelection()
  576. {
  577.     BreakIterator* e = BreakIterator::createSentenceInstance();
  578.     doFirstSelectionTest(*e, testSentenceText, sentenceSelectionData);
  579.     delete e;
  580. }
  581.  
  582. void IntlTestTextBoundary::TestLastSentenceSelection()
  583. {
  584.     BreakIterator* e = BreakIterator::createSentenceInstance();
  585.     doLastSelectionTest(*e, testSentenceText, sentenceSelectionData);
  586.     delete e;
  587. }
  588.  
  589. void IntlTestTextBoundary::TestBackwardSentenceSelection()
  590. {
  591.     BreakIterator* e = BreakIterator::createSentenceInstance();
  592.     doBackwardSelectionTest(*e, testSentenceText, sentenceSelectionData);
  593.     delete e;
  594. }
  595.  
  596. void IntlTestTextBoundary::TestForwardSentenceIndexSelection()
  597. {
  598.     BreakIterator* e = BreakIterator::createSentenceInstance();
  599.     doForwardIndexSelectionTest(*e, testSentenceText, sentenceSelectionData);
  600.     delete e;
  601. }
  602.  
  603. void IntlTestTextBoundary::TestBackwardSentenceIndexSelection()
  604. {
  605.     BreakIterator* e = BreakIterator::createSentenceInstance();
  606.     doBackwardIndexSelectionTest(*e, testSentenceText, sentenceSelectionData);
  607.     delete e;
  608. }
  609.  
  610. void IntlTestTextBoundary::TestSentenceMultipleSelection()
  611. {
  612.     BreakIterator* e = BreakIterator::createSentenceInstance();
  613.     doMultipleSelectionTest(*e, testSentenceText);
  614.     delete e;
  615. }
  616.  
  617. void IntlTestTextBoundary::TestSentenceInvariants()
  618. {
  619.     BreakIterator *e = BreakIterator::createSentenceInstance();
  620.     UnicodeString s = *cannedTestChars + CharsToUnicodeString(".,\\u3001\\u3002\\u3041\\u3042\\u3043\\ufeff");
  621.     doOtherInvariantTest(*e, s);
  622.     delete e;
  623. }
  624. //---------------------------------------------
  625. // WordBreak tests
  626. //---------------------------------------------
  627.  
  628. void IntlTestTextBoundary::TestForwardWordSelection()
  629. {
  630.     BreakIterator* e = BreakIterator::createWordInstance();
  631.     doForwardSelectionTest(*e, testWordText, wordSelectionData);
  632.     delete e;
  633. }
  634.  
  635. void IntlTestTextBoundary::TestFirstWordSelection()
  636. {
  637.     BreakIterator* e = BreakIterator::createWordInstance();
  638.     doFirstSelectionTest(*e, testWordText, wordSelectionData);
  639.     delete e;
  640. }
  641.  
  642. void IntlTestTextBoundary::TestLastWordSelection()
  643. {
  644.     BreakIterator* e = BreakIterator::createWordInstance();
  645.     doLastSelectionTest(*e, testWordText, wordSelectionData);
  646.     delete e;
  647. }
  648.  
  649. void IntlTestTextBoundary::TestBackwardWordSelection()
  650. {
  651.     BreakIterator* e = BreakIterator::createWordInstance();
  652.     doBackwardSelectionTest(*e, testWordText, wordSelectionData);
  653.     delete e;
  654. }
  655.  
  656. void IntlTestTextBoundary::TestForwardWordIndexSelection()
  657. {
  658.     BreakIterator* e = BreakIterator::createWordInstance();
  659.     doForwardIndexSelectionTest(*e, testWordText, wordSelectionData);
  660.     delete e;
  661. }
  662.  
  663. void IntlTestTextBoundary::TestBackwardWordIndexSelection()
  664. {
  665.     BreakIterator* e = BreakIterator::createWordInstance();
  666.     doBackwardIndexSelectionTest(*e, testWordText, wordSelectionData);
  667.     delete e;
  668. }
  669.  
  670. void IntlTestTextBoundary::TestWordMultipleSelection()
  671. {
  672.     BreakIterator* e = BreakIterator::createWordInstance();
  673.     doMultipleSelectionTest(*e, testWordText);
  674.     delete e;
  675. }
  676.  
  677. void IntlTestTextBoundary::TestWordInvariants()
  678. {
  679.     BreakIterator *e = BreakIterator::createWordInstance();
  680.     UnicodeString s = *cannedTestChars + CharsToUnicodeString("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");
  681.     doBreakInvariantTest(*e, s);
  682.     s = *cannedTestChars + CharsToUnicodeString("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");
  683.     doOtherInvariantTest(*e, s);
  684.     delete e;
  685. }
  686.  
  687. //---------------------------------------------
  688. // LineBreak tests
  689. //---------------------------------------------
  690.  
  691. void IntlTestTextBoundary::TestForwardLineSelection()
  692. {
  693.     BreakIterator* e = BreakIterator::createLineInstance();
  694.     doForwardSelectionTest(*e, testLineText, lineSelectionData);
  695.     delete e;
  696. }
  697.  
  698. void IntlTestTextBoundary::TestFirstLineSelection()
  699. {
  700.     BreakIterator* e = BreakIterator::createLineInstance();
  701.     doFirstSelectionTest(*e, testLineText, lineSelectionData);
  702.     delete e;
  703. }
  704.  
  705. void IntlTestTextBoundary::TestLastLineSelection()
  706. {
  707.     BreakIterator* e = BreakIterator::createLineInstance();
  708.     doLastSelectionTest(*e, testLineText, lineSelectionData);
  709.     delete e;
  710. }
  711.  
  712. void IntlTestTextBoundary::TestBackwardLineSelection()
  713. {
  714.     BreakIterator* e = BreakIterator::createLineInstance();
  715.     doBackwardSelectionTest(*e, testLineText, lineSelectionData);
  716.     delete e;
  717. }
  718.  
  719. void IntlTestTextBoundary::TestForwardLineIndexSelection()
  720. {
  721.     BreakIterator* e = BreakIterator::createLineInstance();
  722.     doForwardIndexSelectionTest(*e, testLineText, lineSelectionData);
  723.     delete e;
  724. }
  725.  
  726. void IntlTestTextBoundary::TestBackwardLineIndexSelection()
  727. {
  728.     BreakIterator* e = BreakIterator::createLineInstance();
  729.     doBackwardIndexSelectionTest(*e, testLineText, lineSelectionData);
  730.     delete e;
  731. }
  732.  
  733. void IntlTestTextBoundary::TestLineMultipleSelection()
  734. {
  735.     BreakIterator* e = BreakIterator::createLineInstance();
  736.     doMultipleSelectionTest(*e, testLineText);
  737.     delete e;
  738. }
  739.  
  740. void IntlTestTextBoundary::TestLineInvariants()
  741. {
  742.     BreakIterator *e = BreakIterator::createLineInstance();
  743.     UnicodeString s = CharsToUnicodeString(".,;:\\u3001\\u3002\\u3041\\u3042\\u3043\\u3044\\u3045\\u30a3\\u4e00\\u4e01\\u4e02");
  744.     UnicodeString testChars = *cannedTestChars + s;
  745.     doBreakInvariantTest(*e, testChars);
  746.     doOtherInvariantTest(*e, testChars);
  747.  
  748.     int errorCount = 0;
  749.     UTextOffset i, j, k;
  750.  
  751.     // in addition to the other invariants, a line-break iterator should make sure that:
  752.     // it doesn't break around the non-breaking characters
  753.     UnicodeString noBreak = CharsToUnicodeString("\\u00a0\\u2007\\u2011\\ufeff");
  754.     UnicodeString work("aaa");
  755.     for (i = 0; i < testChars.size(); i++) {
  756.         UChar c = testChars[i];
  757.         if (c == '\r' || c == '\n' || c == 0x2029 || c == 0x2028 || c == 0x0003)
  758.             continue;
  759.         work[0] = c;
  760.         for (j = 0; j < noBreak.size(); j++) {
  761.             work[1] = noBreak[j];
  762.             for (k = 0; k < testChars.size(); k++) {
  763.                 work[2] = testChars[k];
  764.                 e->setText(&work);
  765.                 for (int l = e->first(); l != BreakIterator::DONE; l = e->next())
  766.                     if (l == 1 || l == 2) {
  767.                         errln("Got break between U+" + UCharToUnicodeString(work[l - 1]) + 
  768.                             " and U+" + UCharToUnicodeString(work[l]));
  769.                         errorCount++;
  770.                         if (errorCount >= 75)
  771.                             return;
  772.                     }
  773.             }
  774.         }
  775.     }
  776.  
  777.     // it does break after hyphens (unless they're followed by a digit, a non-spacing mark,
  778.     // a currency symbol, a non-breaking space, or a line or paragraph separator)
  779.     UnicodeString dashes = CharsToUnicodeString("-\\u00ad\\u2010\\u2012\\u2013\\u2014");
  780.     for (i = 0; i < testChars.size(); i++) {
  781.         work[0] = testChars[i];
  782.         for (j = 0; j < dashes.size(); j++) {
  783.             work[1] = dashes[j];
  784.             for (k = 0; k < testChars.size(); k++) {
  785.                 UChar c = testChars[k];
  786.                 if (Unicode::getType(c) == Unicode::DECIMAL_DIGIT_NUMBER ||
  787.                     Unicode::getType(c) == Unicode::OTHER_NUMBER ||
  788.                     Unicode::getType(c) == Unicode::NON_SPACING_MARK ||
  789.                     Unicode::getType(c) == Unicode::ENCLOSING_MARK ||
  790.                     Unicode::getType(c) == Unicode::CURRENCY_SYMBOL ||
  791.                     c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029 ||
  792.                     c == 0x0003 || c == 0x00a0 || c == 0x2007 || c == 0x2011 ||
  793.                     c == 0xfeff)
  794.                     continue;
  795.                 work[2] = c;
  796.                 e->setText(&work);
  797.                 bool_t saw2 = FALSE;
  798.                 for (int l = e->first(); l != BreakIterator::DONE; l = e->next())
  799.                     if (l == 2)
  800.                         saw2 = TRUE;
  801.                 if (!saw2) {
  802.                     errln("Didn't get break between U+" + UCharToUnicodeString(work[1]) + 
  803.                         " and U+" + UCharToUnicodeString(work[2]));
  804.                     errorCount++;
  805.                     if (errorCount >= 75)
  806.                         return;
  807.                 }
  808.             }
  809.         }
  810.     }
  811.  
  812. }
  813.  
  814. //---------------------------------------------
  815. // CharacterBreak tests
  816. //---------------------------------------------
  817.  
  818. void IntlTestTextBoundary::TestForwardCharacterSelection()
  819. {
  820.     BreakIterator* e = BreakIterator::createCharacterInstance();
  821.     doForwardSelectionTest(*e, testCharacterText, characterSelectionData);
  822.     delete e;
  823. }
  824.  
  825. void IntlTestTextBoundary::TestFirstCharacterSelection()
  826. {
  827.     BreakIterator* e = BreakIterator::createCharacterInstance();
  828.     doFirstSelectionTest(*e, testCharacterText, characterSelectionData);
  829.     delete e;
  830. }
  831.  
  832. void IntlTestTextBoundary::TestLastCharacterSelection()
  833. {
  834.     BreakIterator* e = BreakIterator::createCharacterInstance();
  835.     doLastSelectionTest(*e, testCharacterText, characterSelectionData);
  836.     delete e;
  837. }
  838.  
  839. void IntlTestTextBoundary::TestBackwardCharacterSelection()
  840. {
  841.     BreakIterator* e = BreakIterator::createCharacterInstance();
  842.     doBackwardSelectionTest(*e, testCharacterText, characterSelectionData);
  843.     delete e;
  844. }
  845.  
  846. void IntlTestTextBoundary::TestForwardCharacterIndexSelection()
  847. {
  848.     BreakIterator* e = BreakIterator::createCharacterInstance();
  849.     doForwardIndexSelectionTest(*e, testCharacterText, characterSelectionData);
  850.     delete e;
  851. }
  852.  
  853. void IntlTestTextBoundary::TestBackwardCharacterIndexSelection()
  854. {
  855.     BreakIterator* e = BreakIterator::createCharacterInstance();
  856.     doBackwardIndexSelectionTest(*e, testCharacterText, characterSelectionData);
  857.     delete e;
  858. }
  859.  
  860. void IntlTestTextBoundary::TestCharacterMultipleSelection()
  861. {
  862.     BreakIterator* e = BreakIterator::createCharacterInstance();
  863.     doMultipleSelectionTest(*e, testCharacterText);
  864.     delete e;
  865. }
  866.  
  867. void IntlTestTextBoundary::TestCharacterInvariants()
  868. {
  869.     BreakIterator *e = BreakIterator::createCharacterInstance();
  870.     UnicodeString s = *cannedTestChars + CharsToUnicodeString("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");
  871.     doBreakInvariantTest(*e, s);
  872.     s = *cannedTestChars + CharsToUnicodeString("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");
  873.     doOtherInvariantTest(*e, s);
  874.     delete e;
  875. }
  876. //---------------------------------------------
  877. // other tests
  878. //---------------------------------------------
  879.  
  880. void IntlTestTextBoundary::TestEmptyString()
  881. {
  882.     UnicodeString text = "";
  883.     Vector x;
  884.     x.addElement(text);
  885.     BreakIterator* bi = BreakIterator::createLineInstance();
  886.     doForwardSelectionTest(*bi, text, &x);
  887.     doFirstSelectionTest(*bi, text, &x);
  888.     doLastSelectionTest(*bi, text, &x);
  889.     doBackwardSelectionTest(*bi, text, &x);
  890.     doForwardIndexSelectionTest(*bi, text, &x);
  891.     doBackwardIndexSelectionTest(*bi, text, &x);
  892.     delete bi;
  893. }
  894.  
  895. void IntlTestTextBoundary::TestGetAvailableLocales()
  896. {
  897.     int32_t locCount = 0;
  898.     const Locale* locList = BreakIterator::getAvailableLocales(locCount);
  899.  
  900.     if (locCount == 0)
  901.         errln("getAvailableLocales() returned an empty list!");
  902.     // I have no idea how to test this function...
  903. }
  904.  
  905. /**
  906.  * @bug 4095322
  907.  */
  908. void IntlTestTextBoundary::TestJapaneseLineBreak()
  909. {
  910.     UnicodeString testString = CharsToUnicodeString("\\u4e00x\\u4e8c");
  911.     UnicodeString precedingChars = CharsToUnicodeString("([{\\u00ab$\\u00a5\\u00a3\\u00a4\\u2018\\u201a\\u201c\\u201e\\u201b\\u201f");
  912.     UnicodeString followingChars = CharsToUnicodeString(")]}\\u00bb!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7\\u30fc:;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u30fe\\u2019\\u201d\\u00b0\\u2032\\u2033\\u2034\\u2030\\u2031\\u2103\\u2109\\u00a2\\u0300\\u0301\\u0302");
  913.     BreakIterator *iter = BreakIterator::createLineInstance(Locale::JAPAN);
  914.     StringCharacterIterator* it = new StringCharacterIterator(testString);
  915.  
  916.     UTextOffset i;
  917.  
  918.     for (i = 0; i < precedingChars.size(); i++) {
  919.         testString[1] = precedingChars[i];
  920.         iter->adoptText(it);
  921.         int32_t j = iter->first();
  922.         if (j != 0)
  923.             errln("ja line break failure: failed to start at 0");
  924.         j = iter->next();
  925.         if (j != 1)
  926.             errln("ja line break failure: failed to stop before '" + UCharToUnicodeString(precedingChars[i])
  927.                         + "' (" + ((int)(precedingChars[i])) + ")");
  928.         j = iter->next();
  929.         if (j != 3)
  930.             errln("ja line break failure: failed to skip position after '" + UCharToUnicodeString(precedingChars[i])
  931.                         + "' (" + ((int)(precedingChars[i])) + ")");
  932.     }
  933.  
  934.     for (i = 0; i < followingChars.size(); i++) {
  935.         testString[1] = followingChars[i];
  936.         it = new StringCharacterIterator(testString);
  937.         iter->adoptText(it);
  938.         int j = iter->first();
  939.         if (j != 0)
  940.             errln("ja line break failure: failed to start at 0");
  941.         j = iter->next();
  942.         if (j != 2)
  943.             errln("ja line break failure: failed to skip position before '" + UCharToUnicodeString(followingChars[i])
  944.                         + "' (" + ((int)(followingChars[i])) + ")");
  945.         j = iter->next();
  946.         if (j != 3)
  947.             errln("ja line break failure: failed to stop after '" + UCharToUnicodeString(followingChars[i])
  948.                         + "' (" + ((int)(followingChars[i])) + ")");
  949.     }
  950.     delete iter;
  951. }
  952.  
  953.     // [serialization test has been removed pursuant to bug #4152965]
  954.  
  955. void IntlTestTextBoundary::TestGetDisplayName()
  956. {
  957.     UnicodeString   result;
  958.     
  959.     BreakIterator::getDisplayName(Locale::US, result);
  960.     if (Locale::getDefault() == Locale::US && result != "English (United States)")
  961.         errln("BreakIterator::getDisplayName() failed: expected \"English (United States)\", got \""
  962.                 + result);
  963.  
  964.     BreakIterator::getDisplayName(Locale::FRANCE, Locale::US, result);
  965.     if (result != "French (France)")
  966.         errln("BreakIterator::getDisplayName() failed: expected \"French (France)\", got \""
  967.                 + result);
  968. }
  969.  
  970. /**
  971.  * @bug 4068137
  972.  */
  973. void IntlTestTextBoundary::TestEndBehavior()
  974. {
  975.     UnicodeString testString("boo.");
  976.     BreakIterator *wb = BreakIterator::createWordInstance();
  977.     wb->setText(&testString);
  978.  
  979.     if (wb->first() != 0)
  980.         errln("Didn't get break at beginning of string.");
  981.     if (wb->next() != 3)
  982.         errln("Didn't get break before period in \"boo.\"");
  983.     if (wb->current() != 4 && wb->next() != 4)
  984.         errln("Didn't get break at end of string.");
  985.     delete wb;
  986. }
  987.  
  988. //---------------------------------------------
  989. // runIndexedTest
  990. //---------------------------------------------
  991.  
  992. void IntlTestTextBoundary::runIndexedTest( int32_t index, bool_t exec, char* &name, char* par )
  993. {
  994.     if (exec) logln("TestSuite TextBoundary: ");
  995.     switch (index) {
  996.         case 0: name = "TestForwardSentenceSelection"; if (exec) TestForwardSentenceSelection(); break;
  997.         case 1: name = "TestFirstSentenceSelection"; if (exec) TestFirstSentenceSelection(); break;
  998.         case 2: name = "TestLastSentenceSelection"; if (exec) TestLastSentenceSelection(); break;
  999.         case 3: name = "TestBackwardSentenceSelection"; if (exec) TestBackwardSentenceSelection(); break;
  1000.         case 4: name = "TestForwardSentenceIndexSelection"; if (exec) TestForwardSentenceIndexSelection(); break;
  1001.         case 5: name = "TestBackwardSentenceIndexSelection"; if (exec) TestBackwardSentenceIndexSelection(); break;
  1002.         case 6: name = "TestSentenceMultipleSelection"; if (exec) TestSentenceMultipleSelection(); break;
  1003.         case 7: name = "TestForwardWordSelection"; if (exec) TestForwardWordSelection(); break;
  1004.         case 8: name = "TestFirstWordSelection"; if (exec) TestFirstWordSelection(); break;
  1005.         case 9: name = "TestLastWordSelection"; if (exec) TestLastWordSelection(); break;
  1006.         case 10: name = "TestBackwardWordSelection"; if (exec) TestBackwardWordSelection(); break;
  1007.         case 11: name = "TestForwardWordIndexSelection"; if (exec) TestForwardWordIndexSelection(); break;
  1008.         case 12: name = "TestBackwardWordIndexSelection"; if (exec) TestBackwardWordIndexSelection(); break;
  1009.         case 13: name = "TestWordMultipleSelection"; if (exec) TestWordMultipleSelection(); break;
  1010.         case 14: name = "TestForwardLineSelection"; if (exec) TestForwardLineSelection(); break;
  1011.         case 15: name = "TestFirstLineSelection"; if (exec) TestFirstLineSelection(); break;
  1012.         case 16: name = "TestLastLineSelection"; if (exec) TestLastLineSelection(); break;
  1013.         case 17: name = "TestBackwardLineSelection"; if (exec) TestBackwardLineSelection(); break;
  1014.         case 18: name = "TestForwardLineIndexSelection"; if (exec) TestForwardLineIndexSelection(); break;
  1015.         case 19: name = "TestBackwardLineIndexSelection"; if (exec) TestBackwardLineIndexSelection(); break;
  1016.         case 20: name = "TestLineMultipleSelection"; if (exec) TestLineMultipleSelection(); break;
  1017.         case 21: name = "TestForwardCharacterSelection"; if (exec) TestForwardCharacterSelection(); break;
  1018.         case 22: name = "TestFirstCharacterSelection"; if (exec) TestFirstCharacterSelection(); break;
  1019.         case 23: name = "TestLastCharacterSelection"; if (exec) TestLastCharacterSelection(); break;
  1020.         case 24: name = "TestBackwardCharacterSelection"; if (exec) TestBackwardCharacterSelection(); break;
  1021.         case 25: name = "TestForwardCharacterIndexSelection"; if (exec) TestForwardCharacterIndexSelection(); break;
  1022.         case 26: name = "TestBackwardCharacterIndexSelection"; if (exec) TestBackwardCharacterIndexSelection(); break;
  1023.         case 27: name = "TestCharacterMultipleSelection"; if (exec) TestCharacterMultipleSelection(); break;
  1024.         case 28: name = "TestEmptyString"; if (exec) TestEmptyString(); break;
  1025.         case 29: name = "TestGetAvailableLocales"; if (exec) TestGetAvailableLocales(); break;
  1026.         case 30: name = "TestGetDisplayName"; if (exec) TestGetDisplayName(); break;
  1027.         case 31: name = "TestPreceding"; if (exec) TestPreceding(); break;
  1028.         case 32: name = "TestBug4153072"; if (exec) TestBug4153072(); break;
  1029.       /*
  1030.         case 33: 
  1031.             name = "BreakIteratorCAPI"; 
  1032.             if (exec) {
  1033.                 logln("BreakIterator C API test---"); logln("");
  1034.                 IntlTestBreakIteratorFormatU_CAPI test;
  1035.                 callTest( test, par );
  1036.             }
  1037.             break;
  1038.       */
  1039.         default: name = ""; break; //needed to end loop
  1040.     }
  1041. }
  1042.  
  1043. //---------------------------------------------
  1044. // Test implementation routines
  1045. //---------------------------------------------
  1046.  
  1047. void IntlTestTextBoundary::doForwardSelectionTest(BreakIterator& iterator,
  1048.                                                   UnicodeString& testText,
  1049.                                                   Vector* result)
  1050. {
  1051.     int32_t forwardSelectionCounter = 0;
  1052.     int32_t forwardSelectionOffset = 0;
  1053.     CharacterIterator *itSource = 0;
  1054.     CharacterIterator *itTarget = 0;
  1055.  
  1056.     logln("doForwardSelectionTest text of length: "+testText.size());
  1057.  
  1058.     // check to make sure setText() and getText() work right
  1059.     iterator.setText(&testText);
  1060.     itSource = iterator.createText();
  1061.     itTarget = new StringCharacterIterator(testText);
  1062.  
  1063.     if (*itSource != *itTarget)
  1064.         errln("createText() didn't return what we passed to setText!");
  1065.     delete itSource;
  1066.     delete itTarget;
  1067.     UnicodeString expectedResult;
  1068.     UnicodeString selectionResult;
  1069.     
  1070.     int32_t lastOffset = iterator.first();
  1071.     int32_t offset = iterator.next();
  1072.     while(offset != BreakIterator::DONE && forwardSelectionCounter < result->size()) {
  1073.         if (offset != iterator.current())
  1074.             errln((UnicodeString)"current() failed: it returned " + iterator.current() + " and offset was " + offset);
  1075.  
  1076.         expectedResult = result->elementAt(forwardSelectionCounter);
  1077.         forwardSelectionOffset += expectedResult.size();
  1078.         testText.extractBetween(lastOffset, offset, selectionResult);
  1079.         if (offset != forwardSelectionOffset) {
  1080.             errln((UnicodeString)"\n*** Selection #" +
  1081.                   forwardSelectionCounter +
  1082.                   "\nExpected : " +
  1083.                   expectedResult +
  1084.                   " - length : " +
  1085.                   expectedResult.size() +
  1086.                   "\nSelected : " +
  1087.                   selectionResult +
  1088.                   " - length : " +
  1089.                   selectionResult.size());
  1090.         }
  1091.         logln((UnicodeString)"#" + forwardSelectionCounter + " ["+lastOffset+", "+offset+"] : " + selectionResult);
  1092.  
  1093.         forwardSelectionCounter++;
  1094.         lastOffset = offset;
  1095.         offset = iterator.next();
  1096.     }
  1097.     if (forwardSelectionCounter < result->size() - 1)
  1098.         errln((UnicodeString)"\n*** Selection #" + forwardSelectionCounter + " not found at offset "+offset+"!!!");
  1099.     else if (forwardSelectionCounter >= result->size() && offset != BreakIterator::DONE)
  1100.         errln((UnicodeString)"\n*** Selection #" + forwardSelectionCounter + " should not exist at offset "+offset+"!!!");
  1101. }
  1102.  
  1103. void IntlTestTextBoundary::doBackwardSelectionTest(BreakIterator& iterator,
  1104.                                                    UnicodeString& testText,
  1105.                                                    Vector* result)
  1106. {
  1107.     int32_t backwardSelectionCounter = (result->size() - 1);
  1108.     int32_t neededOffset = testText.size();
  1109.     int32_t lastOffset = iterator.last();
  1110.     iterator.setText(&testText);
  1111.     int32_t offset = iterator.previous();
  1112.     
  1113.     UnicodeString expectedResult;
  1114.     UnicodeString selectionResult;
  1115.     
  1116.     while(offset != BreakIterator::DONE)
  1117.     {
  1118.         expectedResult = (UnicodeString)result->elementAt(backwardSelectionCounter);
  1119.         neededOffset -= expectedResult.size();
  1120.         testText.extractBetween(offset, lastOffset, selectionResult);
  1121.         if(offset != neededOffset) {
  1122.             errln(
  1123.                 (UnicodeString)"\n*** Selection #" +
  1124.                 backwardSelectionCounter +
  1125.                 "\nExpected "+neededOffset+"> "  +
  1126.                 expectedResult +
  1127.                 " <" +
  1128.                 "\nSelected "+offset+"> " +
  1129.                 selectionResult +
  1130.                 " <");
  1131.         }
  1132.  
  1133.         logln((UnicodeString)"#" + backwardSelectionCounter + " : " + selectionResult);
  1134.         backwardSelectionCounter--;
  1135.         lastOffset = offset;
  1136.         offset = iterator.previous();
  1137.     }
  1138.     if (backwardSelectionCounter >= 0 && offset != BreakIterator::DONE)
  1139.         errln((UnicodeString)"*** Selection #" + backwardSelectionCounter + " not found!!!");
  1140. }
  1141.  
  1142. void IntlTestTextBoundary::doFirstSelectionTest(BreakIterator& iterator,
  1143.                                                 UnicodeString& testText,
  1144.                                                 Vector* result)
  1145. {
  1146.     bool_t success = TRUE;
  1147.     UnicodeString expectedFirstSelection;
  1148.     UnicodeString tempFirst;
  1149.  
  1150.     iterator.setText(&testText);
  1151.     int32_t selectionStart = iterator.first();
  1152.     int32_t selectionEnd = iterator.next();
  1153.     if(selectionEnd != BreakIterator::DONE) {
  1154.         testText.extractBetween(selectionStart, selectionEnd, tempFirst);
  1155.  
  1156.         expectedFirstSelection = result->elementAt(0);
  1157.         if(tempFirst != expectedFirstSelection) {
  1158.             errln(
  1159.                 (UnicodeString)"\n\n" +
  1160.                 "### Error in TestFindWord::doFirstSelectionTest. First selection not equal to what expected." +
  1161.                 "\nExpexcted : " +
  1162.                 expectedFirstSelection +
  1163.                 " - length : " +
  1164.                 expectedFirstSelection.size() +
  1165.                 "\nSelected : " +
  1166.                 tempFirst +
  1167.                 " - length : " +
  1168.                 tempFirst.size() +
  1169.                 "\n");
  1170.             success = FALSE;
  1171.         }
  1172.     }
  1173.     else if (selectionStart != 0 || testText.size() != 0) {
  1174.         errln((UnicodeString)"\n### Error in TTestFindWord::doFirstSelectionTest. Could not get first selection.\n"+
  1175.             "start = "+selectionStart+"  end = "+selectionEnd);
  1176.         success = FALSE;
  1177.     }
  1178.  
  1179.     if(success) {
  1180.         logln(
  1181.             (UnicodeString)"IntlTestTextBoundary::doFirstSelectionTest \n" +
  1182.             "\nExpexcted first selection: " +
  1183.             expectedFirstSelection +
  1184.             "\nCalculated first selection: " +
  1185.             tempFirst +
  1186.             " is correct\n");
  1187.     }
  1188. }
  1189.  
  1190. void IntlTestTextBoundary::doLastSelectionTest(BreakIterator& iterator,
  1191.                                                UnicodeString& testText,
  1192.                                                Vector* result)
  1193. {
  1194.     bool_t success = TRUE;
  1195.     UnicodeString expectedLastSelection;
  1196.     UnicodeString tempLast;
  1197.  
  1198.     iterator.setText(&testText);
  1199.     int32_t selectionEnd = iterator.last();
  1200.     int32_t selectionStart = iterator.previous();
  1201.     if(selectionStart != BreakIterator::DONE) {
  1202.         testText.extractBetween(selectionStart, selectionEnd, tempLast);
  1203.         expectedLastSelection = result->lastElement();
  1204.         if(tempLast != expectedLastSelection) {
  1205.             errln(
  1206.                 (UnicodeString)"\n\n" +
  1207.                 "### Error in TTestFindWord::doLastSelectionTest. Last selection not equal to what expected." +
  1208.                 "\nExpexcted : " +
  1209.                 expectedLastSelection +
  1210.                 " - length : " +
  1211.                 expectedLastSelection.size() +
  1212.                 "\nSelected : " +
  1213.                  tempLast +
  1214.                  " - length : " +
  1215.                 tempLast.size() +
  1216.                  "\n");
  1217.             success = FALSE;
  1218.         }
  1219.     }
  1220.     else if (selectionEnd != 0 || testText.size() != 0) {
  1221.         errln((UnicodeString)"\n### Error in TTestFindWord::doLastSelectionTest. Could not get last selection."+
  1222.             "["+selectionStart+","+selectionEnd+"]");
  1223.         success = FALSE;
  1224.     }
  1225.  
  1226.     if(success) {
  1227.         logln(
  1228.             (UnicodeString)"TTestFindWord::doLastSelectionTest \n" +
  1229.             "\nExpexcted last selection: " +
  1230.             expectedLastSelection +
  1231.             "\nCalculated last selection: " +
  1232.             tempLast +
  1233.             "\n");
  1234.     }
  1235. }
  1236.  
  1237. /**
  1238.  * @bug 4052418 4068139
  1239.  */
  1240. void IntlTestTextBoundary::doForwardIndexSelectionTest(BreakIterator& iterator,
  1241.                                                        UnicodeString& testText,
  1242.                                                        Vector* result)
  1243. {
  1244.     int32_t arrayCount = result->size();
  1245.     int32_t textLength = testText.size();
  1246.     iterator.setText(&testText);
  1247.     for(UTextOffset offset = 0; offset < textLength; offset++) {
  1248.         int32_t selBegin = iterator.preceding(offset);
  1249.         int32_t selEnd = iterator.following(offset);
  1250.         bool_t isBound = iterator.isBoundary(offset);
  1251.  
  1252.         int32_t entry = 0;
  1253.         int32_t pos = 0;
  1254.         if (selBegin != BreakIterator::DONE) {
  1255.             while (pos < selBegin && entry < arrayCount) {
  1256.                 pos += ((UnicodeString)(result->elementAt(entry))).size();
  1257.                 ++entry;
  1258.             }
  1259.             if (pos != selBegin) {
  1260.                 errln((UnicodeString)"With offset = " + offset + ", got back spurious " + selBegin + " from preceding.");
  1261.                 continue;
  1262.             }
  1263.             else {
  1264.                 pos += ((UnicodeString)(result->elementAt(entry))).size();
  1265.                 ++entry;
  1266.             }
  1267.         }
  1268.         if (isBound) {
  1269.             if (pos != offset) {
  1270.                 errln((UnicodeString)"isBoundary() erroneously returned true with offset = " + offset);
  1271.                 continue;
  1272.             }
  1273.             else {
  1274.                 pos += ((UnicodeString)(result->elementAt(entry))).size();
  1275.                 ++entry;
  1276.             }
  1277.         }
  1278.         if (pos != selEnd) {
  1279.             errln((UnicodeString)"With offset = " + offset + ", got back erroneous " + selEnd + " from following.");
  1280.             continue;
  1281.         }
  1282.     }
  1283. }
  1284.  
  1285. /**
  1286.  * @bug 4052418 4068139
  1287.  */
  1288. void IntlTestTextBoundary::doBackwardIndexSelectionTest(BreakIterator& iterator,
  1289.                                                         UnicodeString& testText,
  1290.                                                         Vector* result)
  1291. {
  1292.     int32_t arrayCount = result->size();
  1293.     int32_t textLength = testText.size();
  1294.     iterator.setText(&testText);
  1295.     for(UTextOffset offset = textLength - 1; offset >= 0; offset--) {
  1296.         int32_t selBegin = iterator.preceding(offset);
  1297.         int32_t selEnd = iterator.following(offset);
  1298.         bool_t isBound = iterator.isBoundary(offset);
  1299.  
  1300.         int32_t entry = 0;
  1301.         int32_t pos = 0;
  1302.         if (selBegin != BreakIterator::DONE) {
  1303.             while (pos < selBegin && entry < arrayCount) {
  1304.                 pos += ((UnicodeString)(result->elementAt(entry))).size();
  1305.                 ++entry;
  1306.             }
  1307.             if (pos != selBegin) {
  1308.                 errln((UnicodeString)"With offset = " + offset + ", got back spurious " + selBegin + " from preceding.");
  1309.                 continue;
  1310.             }
  1311.             else {
  1312.                 pos += ((UnicodeString)(result->elementAt(entry))).size();
  1313.                 ++entry;
  1314.             }
  1315.         }
  1316.         if (isBound) {
  1317.             if (pos != offset) {
  1318.                 errln((UnicodeString)"isBoundary() erroneously returned true with offset = " + offset);
  1319.                 continue;
  1320.             }
  1321.             else {
  1322.                 pos += ((UnicodeString)(result->elementAt(entry))).size();
  1323.                 ++entry;
  1324.             }
  1325.         }
  1326.         if (pos != selEnd) {
  1327.             errln((UnicodeString)"With offset = " + offset + ", got back erroneous " + selEnd + " from following.");
  1328.             continue;
  1329.         }
  1330.     }
  1331. }
  1332.  
  1333. /*
  1334.  * @bug 4153072
  1335.  */
  1336. void IntlTestTextBoundary::TestBug4153072() {
  1337.     BreakIterator *iter = BreakIterator::createWordInstance();
  1338.     UnicodeString str("...Hello, World!...");
  1339.     int32_t begin = 3;
  1340.     int32_t end = str.size() - 3;
  1341.     bool_t gotException = FALSE;
  1342.     bool_t dummy;
  1343.  
  1344.     StringCharacterIterator textIterator(str, begin, end, begin);
  1345.     iter->adoptText(&textIterator);
  1346.     for (int index = -1; index < begin + 1; ++index) {
  1347.         dummy = iter->isBoundary(index);
  1348.         if (index < begin && dummy == TRUE) {
  1349.             errln((UnicodeString)"Didn't handle preceeding correctly with offset = " + index +
  1350.                             " and begin index = " + begin);
  1351.         }
  1352.     }
  1353.     delete iter;
  1354. }
  1355.  
  1356. void IntlTestTextBoundary::doMultipleSelectionTest(BreakIterator& iterator,
  1357.                                                    UnicodeString& testText)
  1358. {
  1359.     iterator.setText(&testText);
  1360.     
  1361.     BreakIterator* testIterator = iterator.clone();
  1362.     int32_t offset = iterator.first();
  1363.     int32_t testOffset;
  1364.     int32_t count = 0;
  1365.  
  1366.     logln("doMultipleSelectionTest text of length: "+testText.size());
  1367.  
  1368.     if (*testIterator != iterator)
  1369.         errln("clone() or operator!= failed: two clones compared unequal");
  1370.     
  1371.     do {
  1372.         testOffset = testIterator->first();
  1373.         testOffset = testIterator->next(count);
  1374.         if (offset != testOffset)
  1375.             errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);
  1376.  
  1377.         if (offset != BreakIterator::DONE) {
  1378.             count++;
  1379.             offset = iterator.next();
  1380.  
  1381.             if (offset != BreakIterator::DONE && *testIterator == iterator)
  1382.                 errln("operator== failed: Two unequal iterators compared equal.");
  1383.         }
  1384.     } while (offset != BreakIterator::DONE);
  1385.  
  1386.     // now do it backwards...
  1387.     offset = iterator.last();
  1388.     count = 0;
  1389.  
  1390.     do {
  1391.         testOffset = testIterator->last();
  1392.         testOffset = testIterator->next(count);
  1393.         if (offset != testOffset)
  1394.             errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);
  1395.  
  1396.         if (offset != BreakIterator::DONE) {
  1397.             count--;
  1398.             offset = iterator.previous();
  1399.         }
  1400.     } while (offset != BreakIterator::DONE);
  1401.     delete testIterator;
  1402. }
  1403.  
  1404. void IntlTestTextBoundary::doBreakInvariantTest(BreakIterator& tb, UnicodeString& testChars)
  1405. {
  1406.     UnicodeString work("aaa");
  1407.     int errorCount = 0;
  1408.  
  1409.     // a break should always occur after CR (unless followed by LF), LF, PS, and LS
  1410.     UnicodeString breaks = CharsToUnicodeString("\r\n\\u2029\\u2028");
  1411.     UTextOffset i, j;
  1412.  
  1413.     for (i = 0; i < breaks.size(); i++) {
  1414.         work[1] = breaks[i];
  1415.         for (j = 0; j < testChars.size(); j++) {
  1416.             work[0] = testChars[j];
  1417.             for (int k = 0; k < testChars.size(); k++) {
  1418.                 UChar c = testChars[k];
  1419.  
  1420.                 // if a cr is followed by lf, ps, ls or etx, don't do the check (that's
  1421.                 // not supposed to work)
  1422.                 if (work[1] == '\r' && (c == '\n' || c == 0x2029
  1423.                         || c == 0x2028 || c == 0x0003))
  1424.                     continue;
  1425.  
  1426.                 work[2] = testChars[k];
  1427.                 tb.setText(&work);
  1428.                 bool_t seen2 = FALSE;
  1429.                 for (int l = tb.first(); l != BreakIterator::DONE; l = tb.next()) {
  1430.                     if (l == 2)
  1431.                         seen2 = TRUE;
  1432.                 }
  1433.                 if (!seen2) {
  1434.                     errln("No break between U+" + UCharToUnicodeString(work[1])
  1435.                                 + " and U+" + UCharToUnicodeString(work[2]));
  1436.                     errorCount++;
  1437.                     if (errorCount >= 75)
  1438.                         return;
  1439.                 }
  1440.             }
  1441.         }
  1442.     }
  1443. }
  1444.  
  1445. void IntlTestTextBoundary::doOtherInvariantTest(BreakIterator& tb, UnicodeString& testChars)
  1446. {
  1447.     UnicodeString work("a\r\na");
  1448.     int32_t errorCount = 0;
  1449.     UTextOffset i, j;
  1450.  
  1451.     // a break should never occur between CR and LF
  1452.     for (i = 0; i < testChars.size(); i++) {
  1453.         work[0] = testChars[i];
  1454.         for (j = 0; j < testChars.size(); j++) {
  1455.             work[3] = testChars[j];
  1456.             tb.setText(&work);
  1457.             for (int32_t k = tb.first(); k != BreakIterator::DONE; k = tb.next())
  1458.                 if (k == 2) {
  1459.                     errln("Break between CR and LF in string U+" + UCharToUnicodeString(work[0]) + 
  1460.                         ", U+d U+a U+" + UCharToUnicodeString(work[3]));
  1461.                     errorCount++;
  1462.                     if (errorCount >= 75)
  1463.                         return;
  1464.                 }
  1465.         }
  1466.     }
  1467.  
  1468.     // a break should never occur before a non-spacing mark, unless the preceding
  1469.     // character is CR, LF, PS, or LS
  1470.     work.remove();
  1471.     work += "aaaa";
  1472.     for (i = 0; i < testChars.size(); i++) {
  1473.         UChar c = testChars[i];
  1474.         if (c == '\n' || c == '\r' || c == 0x2029 || c == 0x2028 || c == 0x0003)
  1475.             continue;
  1476.         work[1] = c;
  1477.         for (j = 0; j < testChars.size(); j++) {
  1478.             c = testChars[j];
  1479.             if ((Unicode::getType(c) != Unicode::NON_SPACING_MARK) && 
  1480.                 (Unicode::getType(c) != Unicode::ENCLOSING_MARK))
  1481.                 continue;
  1482.             work[2] = c;
  1483.             tb.setText(&work);
  1484.             for (int k = tb.first(); k != BreakIterator::DONE; k = tb.next())
  1485.                 if (k == 2) {
  1486.                     errln("Break between U+" + UCharToUnicodeString(work[1])
  1487.                             + " and U+" + UCharToUnicodeString(work[2]));
  1488.                     errorCount++;
  1489.                     if (errorCount >= 75)
  1490.                         return;
  1491.                 }
  1492.         }
  1493.     }
  1494. }
  1495.  
  1496. void IntlTestTextBoundary::sample(BreakIterator& tb,
  1497.                                   UnicodeString& text,
  1498.                                   UnicodeString& title)
  1499. {
  1500.     UnicodeString   substring;
  1501.     bool_t verboseWas = verbose;
  1502.     verbose = TRUE;
  1503.     logln("-------------------------"+title+" length = "+text.size());
  1504.     tb.setText(&text);
  1505.     int32_t start = tb.first();
  1506.     int32_t end;
  1507.     for (end = tb.next(); end != BreakIterator::DONE; end = tb.next()) {
  1508.         text.extractBetween(start, end, substring);
  1509.         logln(UnicodeString("[")+start+","+end+"] \""+substring+"\"");
  1510.         start = end;
  1511.     }
  1512.     verbose = verboseWas;
  1513. }
  1514.  
  1515. void IntlTestTextBoundary::TestPreceding()
  1516. {
  1517.     UnicodeString words3("aaa bbb ccc");
  1518.     BreakIterator* e = BreakIterator::createWordInstance();
  1519.     e->setText( &words3 );
  1520.     e->first();
  1521.     UTextOffset p1 = e->next();
  1522.     UTextOffset p2 = e->next();
  1523.     UTextOffset p3 = e->next();
  1524.     UTextOffset p4 = e->next();
  1525.     UTextOffset f = e->following( p2+1 );
  1526.     UTextOffset p = e->preceding( p2+1 );
  1527.     if (f!=p3) errln("IntlTestTextBoundary::TestPreceding: f!=p3");
  1528.     if (p!=p2) errln("IntlTestTextBoundary::TestPreceding: p!=p2");
  1529.     if (!e->isBoundary(p2) || e->isBoundary(p2+1) || !e->isBoundary(p3))
  1530.     {
  1531.         errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
  1532.     }
  1533.     delete e;
  1534. }
  1535.  
  1536.