home *** CD-ROM | disk | FTP | other *** search
- package com.extensibility.util.regexpr;
-
- import java.util.Enumeration;
- import java.util.Hashtable;
- import java.util.Vector;
-
- public class CharPatternTerm implements PatternTerm {
- public static final int TYPE_SEQ = 0;
- public static final int TYPE_CHOICE = 1;
- public static final int TYPE_LEAF_POSITIVE = 2;
- public static final int TYPE_LEAF_NEGATIVE = 3;
- public static final int TYPE_LEAF_WILDCARD = 4;
- protected int minOccurrence = 1;
- protected int maxOccurrence = 1;
- protected int type;
- protected int[] categories;
- protected Vector subTermVector = new Vector();
- protected Vector rangeVector = new Vector();
- protected CharPatternTerm notTerm;
- protected static Hashtable categoryMap = null;
- protected static Hashtable blockMap;
-
- public CharPatternTerm(int var1) {
- this.type = var1;
- }
-
- public CharPatternTerm(char var1) {
- this.type = 2;
- this.rangeVector.addElement(new CharRange(var1));
- }
-
- public void addSubTerm(CharPatternTerm var1) {
- this.subTermVector.addElement(var1);
- }
-
- public void addChar(char var1) {
- this.rangeVector.addElement(new CharRange(var1));
- }
-
- public void addChar(char var1, char var2) {
- this.rangeVector.addElement(new CharRange(var1, var2));
- }
-
- public void combineRanges() {
- CharRange var1 = (CharRange)this.rangeVector.elementAt(this.rangeVector.size() - 2);
- CharRange var2 = (CharRange)this.rangeVector.elementAt(this.rangeVector.size() - 1);
- var1.setLast(var2.getLast());
- this.rangeVector.removeElementAt(this.rangeVector.size() - 1);
- }
-
- public void addNegativeTerm(CharPatternTerm var1) {
- this.notTerm = var1;
- }
-
- public void setCategory(String var1) {
- if (categoryMap == null) {
- categoryMap = new Hashtable();
- categoryMap.put("L", new int[]{1, 2, 3, 4, 5});
- categoryMap.put("Lu", new int[]{1});
- categoryMap.put("Ll", new int[]{2});
- categoryMap.put("Lt", new int[]{3});
- categoryMap.put("Lm", new int[]{4});
- categoryMap.put("Lo", new int[]{5});
- categoryMap.put("M", new int[]{6, 8, 7});
- categoryMap.put("Mn", new int[]{6});
- categoryMap.put("Mc", new int[]{8});
- categoryMap.put("Me", new int[]{7});
- categoryMap.put("N", new int[]{9, 10, 11});
- categoryMap.put("Nd", new int[]{9});
- categoryMap.put("Nl", new int[]{10});
- categoryMap.put("No", new int[]{11});
- categoryMap.put("P", new int[]{23, 20, 21, 22, 24});
- categoryMap.put("Pc", new int[]{23});
- categoryMap.put("Pd", new int[]{20});
- categoryMap.put("Ps", new int[]{21});
- categoryMap.put("Pe", new int[]{22});
- categoryMap.put("Pi", new int[]{24});
- categoryMap.put("Pf", new int[]{24});
- categoryMap.put("Po", new int[]{24});
- categoryMap.put("Z", new int[]{12, 13, 14});
- categoryMap.put("Zs", new int[]{12});
- categoryMap.put("Zl", new int[]{13});
- categoryMap.put("Zp", new int[]{14});
- categoryMap.put("S", new int[]{25, 26, 27, 28});
- categoryMap.put("Sm", new int[]{25});
- categoryMap.put("Sc", new int[]{26});
- categoryMap.put("Sk", new int[]{27});
- categoryMap.put("So", new int[]{28});
- categoryMap.put("C", new int[]{15, 16, 19, 18, 0});
- categoryMap.put("Cc", new int[]{15});
- categoryMap.put("Cf", new int[]{16});
- categoryMap.put("Cs", new int[]{19});
- categoryMap.put("Co", new int[]{18});
- categoryMap.put("Cn", new int[]{0});
- }
-
- this.categories = (int[])categoryMap.get(var1);
- if (this.categories == null) {
- throw new RuntimeException(String.valueOf(String.valueOf("invalid Unicode category \"").concat(String.valueOf(var1))).concat(String.valueOf("\"")));
- }
- }
-
- public void setBlock(String var1) {
- if (blockMap == null) {
- blockMap = new Hashtable();
- blockMap.put("BasicLatin", new char[]{'\u0000', '\u007f'});
- blockMap.put("Latin-1Supplement", new char[]{'\u0080', 'ÿ'});
- blockMap.put("LatinExtended-A", new char[]{'Ā', 'ſ'});
- blockMap.put("LatinExtended-B", new char[]{'ΔÄ', '…è'});
- blockMap.put("IPAExtensions", new char[]{'…ê', ' Ø'});
- blockMap.put("SpacingModifierLetters", new char[]{' ∞', 'Àø'});
- blockMap.put("CombiningDiacriticalMarks", new char[]{'ÃÄ', 'ÕØ'});
- blockMap.put("Greek", new char[]{'Ͱ', 'Ͽ'});
- blockMap.put("Cyrillic", new char[]{'–Ä', '”ø'});
- blockMap.put("Armenian", new char[]{'\u0530', '֏'});
- blockMap.put("Hebrew", new char[]{'\u0590', '\u05ff'});
- blockMap.put("Arabic", new char[]{'\u0600', '€ø'});
- blockMap.put("Syriac ", new char[]{'܀', 'ݏ'});
- blockMap.put("Thaana", new char[]{'ހ', '\u07bf'});
- blockMap.put("Devanagari", new char[]{'ऀ', 'ॿ'});
- blockMap.put("Bengali", new char[]{'ঀ', '\u09ff'});
- blockMap.put("Gurmukhi", new char[]{'\u0a00', '\u0a7f'});
- blockMap.put("Gujarati", new char[]{'\u0a80', '‡´ø'});
- blockMap.put("Oriya", new char[]{'\u0b00', '\u0b7f'});
- blockMap.put("Tamil", new char[]{'\u0b80', '\u0bff'});
- blockMap.put("Telugu", new char[]{'ఀ', '౿'});
- blockMap.put("Kannada", new char[]{'ಀ', '\u0cff'});
- blockMap.put("Malayalam", new char[]{'ഀ', 'ൿ'});
- blockMap.put("Sinhala", new char[]{'\u0d80', '\u0dff'});
- blockMap.put("Thai", new char[]{'\u0e00', '\u0e7f'});
- blockMap.put("Lao", new char[]{'\u0e80', '\u0eff'});
- blockMap.put("Tibetan", new char[]{'ༀ', '\u0fff'});
- blockMap.put("Myanmar ", new char[]{'·ÄÄ', '·Çü'});
- blockMap.put("Georgian", new char[]{'Ⴀ', 'ჿ'});
- blockMap.put("Hangul Jamo", new char[]{'·ÑÄ', '·áø'});
- blockMap.put("Ethiopic", new char[]{'ሀ', '\u137f'});
- blockMap.put("Cherokee", new char[]{'Ꭰ', '\u13ff'});
- blockMap.put("UnifiedCanadianAboriginalSyllabics", new char[]{'᐀', 'ᙿ'});
- blockMap.put("Ogham", new char[]{'·öÄ', '\u169f'});
- blockMap.put("Runic", new char[]{'·ö†', '\u16ff'});
- blockMap.put("Khmer", new char[]{'·ûÄ', '\u17ff'});
- blockMap.put("Mongolian", new char[]{'᠀', '\u18af'});
- blockMap.put("LatinExtendedAdditional", new char[]{'Ḁ', 'ỿ'});
- blockMap.put("GreekExtended", new char[]{'ἀ', '\u1fff'});
- blockMap.put("GeneralPunctuation", new char[]{'‚ÄÄ', '\u206f'});
- blockMap.put("SuperscriptsandSubscripts", new char[]{'⁰', '\u209f'});
- blockMap.put("CurrencySymbols", new char[]{'₠', '\u20cf'});
- blockMap.put("CombiningMarksforSymbols", new char[]{'‚Éê', '\u20ff'});
- blockMap.put("LetterlikeSymbols", new char[]{'‚ÑÄ', '‚Öè'});
- blockMap.put("NumberForms", new char[]{'‚Öê', '\u218f'});
- blockMap.put("Arrows", new char[]{'‚Üê', '‚áø'});
- blockMap.put("MathematicalOperators", new char[]{'∀', '⋿'});
- blockMap.put("MiscellaneousTechnical", new char[]{'⌀', '⏿'});
- blockMap.put("ControlPictures", new char[]{'␀', '\u243f'});
- blockMap.put("OpticalCharacterRecognition", new char[]{'⑀', '\u245f'});
- blockMap.put("EnclosedAlphanumerics", new char[]{'①', '⓿'});
- blockMap.put("BoxDrawing", new char[]{'─', '╿'});
- blockMap.put("BlockElements", new char[]{'▀', '▟'});
- blockMap.put("GeometricShapes", new char[]{'■', '◿'});
- blockMap.put("MiscellaneousSymbols", new char[]{'‚òÄ', '‚õø'});
- blockMap.put("Dingbats", new char[]{'‚úÄ', '‚ûø'});
- blockMap.put("BraillePatterns", new char[]{'⠀', '⣿'});
- blockMap.put("CJKRadicalsSupplement", new char[]{'⺀', '\u2eff'});
- blockMap.put("KangxiRadicals", new char[]{'⼀', '\u2fdf'});
- blockMap.put("IdeographicDescriptionCharacters", new char[]{'‚ø∞', '\u2fff'});
- blockMap.put("CJKSymbolsandPunctuation", new char[]{'„ÄÄ', '„Äø'});
- blockMap.put("Hiragana", new char[]{'\u3040', '„Çü'});
- blockMap.put("Katakana", new char[]{'゠', 'ヿ'});
- blockMap.put("Bopomofo", new char[]{'\u3100', '„ÑØ'});
- blockMap.put("HangulCompatibilityJamo", new char[]{'\u3130', '\u318f'});
- blockMap.put("Kanbun", new char[]{'„Üê', '„Üü'});
- blockMap.put("BopomofoExtended", new char[]{'ㆠ', 'ㆿ'});
- blockMap.put("EnclosedCJKLettersandMonths", new char[]{'㈀', '㋿'});
- blockMap.put("CJKCompatibility", new char[]{'㌀', '㏿'});
- blockMap.put("CJKUnifiedIdeographsExtensionA", new char[]{'㐀', '䶵'});
- blockMap.put("CJKUnifiedIdeographs", new char[]{'一', '鿿'});
- blockMap.put("YiSyllables", new char[]{'ÍÄÄ', '\ua48f'});
- blockMap.put("YiRadicals", new char[]{'Ííê', '\ua4cf'});
- blockMap.put("HangulSyllables", new char[]{'가', '힣'});
- blockMap.put("HighSurrogates", new char[]{'\ud800', '\udb7f'});
- blockMap.put("HighPrivateUseSurrogates", new char[]{'\udb80', '\udbff'});
- blockMap.put("LowSurrogates", new char[]{'\udc00', '\udfff'});
- blockMap.put("PrivateUse", new char[]{'\ue000', '\uf8ff'});
- blockMap.put("CJKCompatibilityIdeographs", new char[]{'Ô§Ä', '\ufaff'});
- blockMap.put("AlphabeticPresentationForms", new char[]{'ff', 'ﭏ'});
- blockMap.put("ArabicPresentationForms-A", new char[]{'ﭐ', '﷿'});
- blockMap.put("CombiningHalfMarks", new char[]{'︠', '︯'});
- blockMap.put("CJKCompatibilityForms", new char[]{'Ô∏∞', 'Ôπè'});
- blockMap.put("SmallFormVariants", new char[]{'Ôπê', '\ufe6f'});
- blockMap.put("ArabicPresentationForms-B", new char[]{'Ôπ∞', '\ufefe'});
- blockMap.put("Specials", new char[]{'\ufeff', 'ÔøΩ'});
- blockMap.put("HalfwidthandFullwidthForms", new char[]{'\uff00', '\uffef'});
- }
-
- char[] var2 = (char[])blockMap.get(var1);
- if (var2 == null) {
- throw new RuntimeException(String.valueOf(String.valueOf("invalid Unicode block name \"").concat(String.valueOf(var1))).concat(String.valueOf("\"")));
- } else {
- this.addChar(var2[0], var2[1]);
- }
- }
-
- public void setOccurrence(int var1, int var2) {
- this.minOccurrence = var1;
- this.maxOccurrence = var2;
- }
-
- public boolean isSeq() {
- return this.type == 0;
- }
-
- public boolean isChoice() {
- return this.type == 1;
- }
-
- public Enumeration getPatternTerms() {
- return this.subTermVector.elements();
- }
-
- public boolean matches(String var1) {
- if (this.type == 4) {
- return true;
- } else {
- boolean var2 = false;
- if (this.categories != null) {
- if (var1.length() > 0) {
- for(int var3 = 0; var3 < this.categories.length && !var2; ++var3) {
- var2 = this.categories[var3] == Character.getType(var1.charAt(0));
- }
- }
- } else {
- for(Enumeration var5 = this.rangeVector.elements(); var5.hasMoreElements() && !var2; var2 = ((CharRange)var5.nextElement()).matches(var1)) {
- }
-
- for(Enumeration var4 = this.subTermVector.elements(); var4.hasMoreElements() && !var2; var2 = ((PatternTerm)var4.nextElement()).matches(var1)) {
- }
- }
-
- if (this.type == 3) {
- var2 = !var2;
- }
-
- if (var2 && this.notTerm != null && this.notTerm.matches(var1)) {
- var2 = false;
- }
-
- return var2;
- }
- }
-
- public boolean intersects(PatternTerm var1) {
- CharPatternTerm var2 = (CharPatternTerm)var1;
- if (this.isLeaf() && var2.isLeaf()) {
- if (this.type == 4 || var2.type == 4) {
- return true;
- }
-
- if (this.type == 3) {
- if (var2.type == 2) {
- return var2.intersects(this);
- }
-
- return false;
- }
-
- if (this.categories != null) {
- if (var2.categories == null) {
- return var2.intersects(this);
- }
-
- for(int var6 = 0; var6 < this.categories.length; ++var6) {
- for(int var8 = 0; var8 < var2.categories.length; ++var8) {
- if (this.categories[var6] == var2.categories[var8]) {
- return true;
- }
- }
- }
-
- return false;
- }
-
- Enumeration var3 = this.rangeVector.elements();
-
- while(var3.hasMoreElements()) {
- CharRange var4 = (CharRange)var3.nextElement();
-
- for(char var5 = var4.getFirst(); var5 <= var4.getLast(); ++var5) {
- if ((this.notTerm == null || !this.notTerm.matches(String.valueOf(var5))) && var2.matches(String.valueOf(var5))) {
- return true;
- }
- }
- }
-
- Enumeration var7 = this.subTermVector.elements();
-
- while(var7.hasMoreElements()) {
- if (((PatternTerm)var7.nextElement()).intersects(var2)) {
- return true;
- }
- }
- }
-
- return false;
- }
-
- protected boolean isLeaf() {
- return this.type == 4 || this.type == 2 || this.type == 3;
- }
-
- public int getMinOccurrence() {
- return this.minOccurrence;
- }
-
- public int getMaxOccurrence() {
- return this.maxOccurrence;
- }
-
- public String toString() {
- String var1 = "";
- if (this.isChoice()) {
- var1 = String.valueOf(var1).concat(String.valueOf("("));
-
- for(int var2 = 0; var2 < this.subTermVector.size(); ++var2) {
- if (var2 > 0) {
- var1 = String.valueOf(var1).concat(String.valueOf(" | "));
- }
-
- var1 = String.valueOf(var1).concat(String.valueOf(this.subTermVector.elementAt(var2).toString()));
- }
-
- var1 = String.valueOf(var1).concat(String.valueOf(")"));
- } else if (this.isSeq()) {
- var1 = String.valueOf(var1).concat(String.valueOf("("));
-
- for(int var10 = 0; var10 < this.subTermVector.size(); ++var10) {
- if (var10 > 0) {
- var1 = String.valueOf(var1).concat(String.valueOf(", "));
- }
-
- var1 = String.valueOf(var1).concat(String.valueOf(this.subTermVector.elementAt(var10).toString()));
- }
-
- var1 = String.valueOf(var1).concat(String.valueOf(")"));
- } else if (this.type == 4) {
- var1 = "WILDCARD";
- } else if (this.type == 2 && this.subTermVector.size() == 0 && this.rangeVector.size() == 1 && this.rangeVector.elementAt(0).toString().length() == 1) {
- var1 = this.rangeVector.elementAt(0).toString();
- } else {
- var1 = String.valueOf(var1).concat(String.valueOf("["));
- if (this.type == 3) {
- var1 = String.valueOf(var1).concat(String.valueOf("^"));
- }
-
- if (this.categories != null) {
- var1 = String.valueOf(var1).concat(String.valueOf("UnicodeProperty("));
-
- for(int var11 = 0; var11 < this.categories.length; ++var11) {
- var1 = String.valueOf(var1).concat(String.valueOf(String.valueOf(var11 == 0 ? "" : ",").concat(String.valueOf(this.categories[var11]))));
- }
-
- var1 = String.valueOf(var1).concat(String.valueOf(")"));
- }
-
- for(int var12 = 0; var12 < this.rangeVector.size(); ++var12) {
- var1 = String.valueOf(var1).concat(String.valueOf(this.rangeVector.elementAt(var12).toString()));
- }
-
- for(int var3 = 0; var3 < this.subTermVector.size(); ++var3) {
- var1 = String.valueOf(var1).concat(String.valueOf(this.subTermVector.elementAt(var3).toString()));
- }
-
- if (this.notTerm != null) {
- var1 = String.valueOf(var1).concat(String.valueOf(String.valueOf("-").concat(String.valueOf(this.notTerm.toString()))));
- }
-
- var1 = String.valueOf(var1).concat(String.valueOf("]"));
- }
-
- if (this.minOccurrence != 1 || this.maxOccurrence != 1) {
- if (this.minOccurrence == 0 && this.maxOccurrence == 1) {
- var1 = String.valueOf(var1).concat(String.valueOf("?"));
- } else if (this.minOccurrence == 0 && this.maxOccurrence == Integer.MAX_VALUE) {
- var1 = String.valueOf(var1).concat(String.valueOf("*"));
- } else if (this.minOccurrence == 1 && this.maxOccurrence == Integer.MAX_VALUE) {
- var1 = String.valueOf(var1).concat(String.valueOf("+"));
- } else {
- var1 = String.valueOf(var1).concat(String.valueOf(String.valueOf(String.valueOf("{").concat(String.valueOf(this.minOccurrence))).concat(String.valueOf(","))));
- if (this.maxOccurrence != Integer.MAX_VALUE) {
- var1 = String.valueOf(var1).concat(String.valueOf(this.maxOccurrence));
- }
-
- var1 = String.valueOf(var1).concat(String.valueOf("}"));
- }
- }
-
- return var1;
- }
- }
-