The BreakIterator C API defines methods for finding the location of boundaries in text
The BreakIterator C API defines methods for finding the location of boundaries in text. Pointer to a UBreakIterator maintain a current position and scan over text returning the index of characters where boundaries occur.

Line boundary analysis determines where a text string can be broken when line-wrapping. The mechanism correctly handles punctuation and hyphenated words.

Sentence boundary analysis allows selection with correct interpretation of periods within numbers and abbreviations, and trailing punctuation marks such as quotation marks and parentheses.

Word boundary analysis is used by search and replace functions, as well as within text editing applications that allow the user to select words with a double click. Word selection provides correct interpretation of punctuation marks within and following words. Characters that are not part of a word, such as symbols or punctuation marks, have word-breaks on both sides.

Character boundary analysis allows users to interact with characters as they expect to, for example, when moving the cursor through a text string. Character boundary analysis provides correct navigation of through character strings, regardless of how the character is stored. For example, an accented character might be stored as a base character and a diacritical mark. What users consider to be a character can differ between languages.

This is the interface for all text boundaries.

Examples:

Helper function to output text

.   void printTextRange(UChar* str, UTextOffset start, UTextOffset end )
.   {
.        UChar* result;
.        UChar* temp;
.        const char* res;
.        temp=(UChar*)malloc(sizeof(UChar) * ((u_strlen(str)-start)+1));
.        result=(UChar*)malloc(sizeof(UChar) * ((end-start)+1));
.        u_strcpy(temp, &str[start]);
.        u_strncpy(result, temp, end-start);
.        res=(char*)malloc(sizeof(char) * (u_strlen(result)+1));
.        u_austrcpy(res, result);
.        printf("%s\n", res); 
.   }
Print each element in order:
.   void printEachForward( UBreakIterator* boundary, UChar* str)
.   {
.      UTextOffset end;
.      UTextOffset start = ubrk_first(boundary);
.      for (end = ubrk_next(boundary)); end != UBRK_DONE; start = end, end = ubrk_next(boundary))
.        {
.            printTextRange(str, start, end );
.        }
.   }
Print each element in reverse order:
.   void printEachBackward( UBreakIterator* boundary, UChar* str)
.   {
.      UTextOffset start;
.      UTextOffset end = ubrk_last(boundary);
.      for (start = ubrk_previous(boundary); start != UBRK_DONE;  end = start, start =ubrk_previous(boundary))
.        {
.            printTextRange( str, start, end );
.        }
.   }
Print first element
.   void printFirst(UBreakIterator* boundary, UChar* str)
.   {
.       UTextOffset end;
.       UTextOffset start = ubrk_first(boundary);
.       end = ubrk_next(boundary);
.       printTextRange( str, start, end );
.   }
Print last element
.   void printLast(UBreakIterator* boundary, UChar* str)
.   {
.       UTextOffset start;
.       UTextOffset end = ubrk_last(boundary);
.       start = ubrk_previous(boundary);
.       printTextRange(str, start, end );
.   }
Print the element at a specified position
.   void printAt(UBreakIterator* boundary, UTextOffset pos , UChar* str)
.   {
.       UTextOffset start;
.       UTextOffset end = ubrk_following(boundary, pos);
.       start = ubrk_previous(boundary);
.       printTextRange(str, start, end );
.   }
Creating and using text boundaries
.      void BreakIterator_Example( void )
.      {
.          UBreakIterator* boundary;
.          UChar *stringToExamine;
.          stringToExamine=(UChar*)malloc(sizeof(UChar) * (strlen("Aaa bbb ccc. Ddd eee fff.")+1) );
.          u_uastrcpy(stringToExamine, "Aaa bbb ccc. Ddd eee fff.");
.          printf("Examining: "Aaa bbb ccc. Ddd eee fff.");
.
.          //print each sentence in forward and reverse order
.          boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
.          printf("----- forward: -----------\n"); 
.          printEachForward(boundary, stringToExamine);
.          printf("----- backward: ----------\n");
.          printEachBackward(boundary, stringToExamine);
.          ubrk_close(boundary);
.
.          //print each word in order
.          boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
.          printf("----- forward: -----------\n"); 
.          printEachForward(boundary, stringToExamine);
.          printf("----- backward: ----------\n");
.          printEachBackward(boundary, stringToExamine);
.          //print first element
.          printf("----- first: -------------\n");
.          printFirst(boundary, stringToExamine);
.          //print last element
.          printf("----- last: --------------\n");
.          printLast(boundary, stringToExamine);
.          //print word at charpos 10
.          printf("----- at pos 10: ---------\n");
.          printAt(boundary, 10 , stringToExamine);
.
.          ubrk_close(boundary);
.      }

alphabetic index hierarchy of classes


this page has been generated automatically by doc++

(c)opyright by Malte Zöckler, Roland Wunderling
contact: doc++@zib.de