Programming Languages Suite

home *** CD-ROM | disk | FTP | other *** search

/ Programming Languages Suite / ProgLangD.iso / VCAFE.3.0A / Main.bin / CollationElementIterator.java < prev next >

Wrap

Text File | 1998-09-22 | 12KB | 341 lines

/* * @(#)CollationElementIterator.java 1.15 97/12/05 * * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved * * Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved. * * The original version of this source code and documentation is copyrighted * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These * materials are provided under terms of a License Agreement between Taligent * and Sun. This technology is protected by multiple US and International * patents. This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. * * Permission to use, copy, modify, and distribute this software * and its documentation for NON-COMMERCIAL purposes and without * fee is hereby granted provided that this copyright notice * appears in all copies. Please refer to the file "copyright.html" * for further important copyright and licensing information. * * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. * */ package java.text; import java.lang.Character; import java.util.Vector; /** * The <code>CollationElementIterator</code> class is used as an iterator * to walk through each character of an international string. Use the iterator * to return the ordering priority of the positioned character. The ordering * priority of a character, which we refer to as a key, defines how a character * is collated in the given collation object. * * * For example, consider the following in Spanish: * <blockquote> * <pre> * "ca" -> the first key is key('c') and second key is key('a'). * "cha" -> the first key is key('ch') and second key is key('a'). * </pre> * </blockquote> * And in German, * <blockquote> * <pre> * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and * the third key is key('b'). * </pre> * </blockquote> * The key of a character is an integer composed of primary order(short), * secondary order(byte), and tertiary order(byte). Java strictly defines * the size and signedness of its primitive data types. Therefore, the static * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>, * and <code>short</code> respectively to ensure the correctness of the key * value. * * * Example of the iterator usage, * <blockquote> * <pre> * // get the first key of the string * String str = "This is a test"; * CollationElementIterator c = * new CollationElementIterator(str, 0, str.length(), * Collator.getInstance()); * int primaryOrder = CollationElementIterator.primaryOrder(c->next()); * </pre> * </blockquote> * * * <code>CollationElementIterator.next</code> returns the collation order * of the next character. A collation order consists of primary order, * secondary order and tertiary order. The data type of the collation * order is int. The first 16 bits of a collation order * is its primary order; the next 8 bits is the secondary order and the * last 8 bits is the tertiary order. * * @see Collator * @see RuleBasedCollator * @version 1.15 12/05/97 * @author Helena Shih */ public final class CollationElementIterator { /** * Null order which indicates the end of string is reached by the * cursor. */ public final static int NULLORDER = 0xffffffff; /** * CollationElementIterator constructor. This takes the source string and * the collation object. The cursor will walk thru the source string based * on the predefined collation rules. If the source string is empty, * NULLORDER will be returned on the calls to next(). * @param sourceText the source string. * @param order the collation object. */ CollationElementIterator(String sourceText, RuleBasedCollator order) { ordering = order; if ( sourceText.length() != 0 ) { text = new DecompositionIterator(sourceText, 0, sourceText.length(), order.getDecomposition()); } } /** * Resets the cursor to the beginning of the string. */ public void reset() { if (text != null) text.reset(); buffer = null; expIndex = 0; swapOrder = 0; text.setDecomposition(ordering.getDecomposition()); } /** * Get the ordering priority of the next character in the string. * @return the next character's ordering. Returns NULLORDER if * the end of string is reached. */ public int next() { if (text == null) return NULLORDER; if (text.getDecomposition() != ordering.getDecomposition()) text.setDecomposition(ordering.getDecomposition()); if (buffer != null) { if (expIndex < buffer.length) { return strengthOrder(buffer[expIndex++]); } else { buffer = null; expIndex = 0; } } else if (swapOrder != 0) { int order = swapOrder << 16; swapOrder = 0; return order; } char ch = text.next(); if (ch == DecompositionIterator.NULLORDER) { return NULLORDER; } int value = ordering.getUnicodeOrder(ch); if (value == RuleBasedCollator.UNMAPPED) { swapOrder = ch; return UNMAPPEDCHARVALUE; } else if (value < RuleBasedCollator.CHARINDEX) { return strengthOrder(value); } // contract characters else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) { return strengthOrder(nextContractChar(ch)); } else if (value >= RuleBasedCollator.EXPANDCHARINDEX) { buffer = ordering.getExpandValueList(ch); expIndex = 0; return strengthOrder(buffer[expIndex++]); } return NULLORDER; } /** * Get the primary order of a collation order. * @param order the collation order * @return the primary order of a collation order. */ public final static int primaryOrder(int order) { order &= RuleBasedCollator.PRIMARYORDERMASK; return (order >>> RuleBasedCollator.PRIMARYORDERSHIFT); } /** * Get the secondary order of a collation order. * @param order the collation order * @return the secondary order of a collation order. */ public final static short secondaryOrder(int order) { order = order & RuleBasedCollator.SECONDARYORDERMASK; return ((short)(order >> RuleBasedCollator.SECONDARYORDERSHIFT)); } /** * Get the tertiary order of a collation order. * @param order the collation order * @return the tertiary order of a collation order. */ public final static short tertiaryOrder(int order) { return ((short)(order &= RuleBasedCollator.TERTIARYORDERMASK)); } // ============================================================ // package private (These need to be made public for searching) // ============================================================ /** * Check if a comparison order is ignorable. * @return true if a character is ignorable, false otherwise. */ final static boolean isIgnorable(int order) { return ((primaryOrder(order) == 0) ? true : false); } /** * Get the comparison order in the desired strength. Ignore the other * differences. * @param order The order value */ final int strengthOrder(int order) { int s = ordering.getStrength(); if (s == Collator.PRIMARY) { order &= RuleBasedCollator.PRIMARYDIFFERENCEONLY; } else if (s == Collator.SECONDARY) { order &= RuleBasedCollator.SECONDARYDIFFERENCEONLY; } return order; } /** * Set the current offset of the character in the processed source string. * @param newOffset The new offset of the accessed character */ final void setOffset(int newOffset) { if (text != null) text.setOffset(newOffset); } /** * Get the current offset of the character in the processed source string. * @return The offset of the accessed character */ final int getOffset() { if (text != null) return text.getOffset(); return 0; } /** * Set the iterator string. * @param source the new string. */ void setText(String source) { buffer = null; swapOrder = 0; if (text == null) { text = new DecompositionIterator(source, ordering.getDecomposition()); } else { text.setDecomposition(ordering.getDecomposition()); text.setText(source); } } //============================================================ // privates //============================================================ private int getEntry(Vector list, String name) { for (int i = 0; i < list.size(); i++) { EntryPair pair = (EntryPair)list.elementAt(i); if (pair.entryName.equals(name)) { return i; } } return RuleBasedCollator.UNMAPPED; } /** * Get the ordering priority of the next contracting character in the * string. * @param ch the starting character of a contracting character token * @return the next contracting character's ordering. Returns NULLORDER * if the end of string is reached. */ private int nextContractChar(char ch) { EntryPair pair = null; Vector list = ordering.getContractValues(ch); int intValue = 0; key.setLength(0); key.append(ch); int n = 0; if ((n = getEntry(list, key.toString())) != RuleBasedCollator.UNMAPPED) { pair = (EntryPair)list.elementAt(n); intValue = pair.value; } while((ch = text.next()) != DecompositionIterator.NULLORDER) { key.append(ch); if ((n = getEntry(list, key.toString())) == RuleBasedCollator.UNMAPPED) { ch = text.previous(); break; } pair = (EntryPair)list.elementAt(n); intValue = pair.value; } // if contracting char is also an expanding char if ((intValue - RuleBasedCollator.EXPANDCHARINDEX) >= 0) { buffer = ordering.getExpandValueList (intValue - RuleBasedCollator.EXPANDCHARINDEX); expIndex = 0; intValue = buffer[expIndex++]; } return intValue; } final static int UNMAPPEDCHARVALUE = 0x7FFF0000; private DecompositionIterator text = null; private int[] buffer = null; private int expIndex = 0; private StringBuffer key = new StringBuffer(5); private int swapOrder = 0; private RuleBasedCollator ordering; }