home *** CD-ROM | disk | FTP | other *** search
- package com.extensibility.util.regexpr;
-
- import com.extensibility.util.Debug;
-
- public class RegExpr {
- static final char NEWLINE = '\n';
- static final char RETURN = '\r';
- static final char TAB = '\t';
- static final char ESCAPE = '\\';
- static final char WILDCARD = '.';
- static final char RANGE = '-';
- // $FF: renamed from: OR char
- static final char field_0 = '|';
- static final char NOT = '^';
- static final char ZERO_OR_ONE = '?';
- static final char ZERO_OR_MORE = '*';
- static final char ONE_OR_MORE = '+';
- static final char LBRACE = '{';
- static final char RBRACE = '}';
- static final char LPAREN = '(';
- static final char RPAREN = ')';
- static final char LBRACKET = '[';
- static final char RBRACKET = ']';
- static final char MINUS = '-';
- static final char REFERENCE = '&';
-
- public static PatternTerm parse(String var0) {
- return parse((PatternInput)(new CharPatternInput(var0)));
- }
-
- public static PatternTerm parse(PatternInput var0) {
- if (!var0.hasNext()) {
- return null;
- } else {
- CharPatternTerm var1 = parseExpr(var0);
- if (var0.hasNext()) {
- throw new RuntimeException("invalid pattern: unused input remaining");
- } else {
- return var1;
- }
- }
- }
-
- protected static CharPatternTerm parseExpr(PatternInput var0) {
- CharPatternTerm var1 = null;
- CharPatternTerm var2 = parseBranch(var0);
- if (var2 != null && peekIs(var0, '|')) {
- if (var1 == null) {
- var1 = new CharPatternTerm(1);
- }
-
- var1.addSubTerm(var2);
-
- while(peekIs(var0, '|')) {
- var0.next();
- var2 = parseBranch(var0);
- if (var2 == null) {
- throw new RuntimeException("invalid pattern: empty branch");
- }
-
- var1.addSubTerm(var2);
- }
- } else {
- var1 = var2;
- }
-
- return var1;
- }
-
- protected static CharPatternTerm parseBranch(PatternInput var0) {
- CharPatternTerm var1 = null;
- CharPatternTerm var2 = parsePiece(var0);
- if (var2 != null && var0.hasNext() && !peekIs(var0, '|') && !peekIs(var0, ')')) {
- if (var1 == null) {
- var1 = new CharPatternTerm(0);
- }
-
- var1.addSubTerm(var2);
-
- while(var0.hasNext() && !peekIs(var0, '|') && !peekIs(var0, ')')) {
- var2 = parsePiece(var0);
- var1.addSubTerm(var2);
- }
- } else {
- var1 = var2;
- }
-
- return var1;
- }
-
- protected static CharPatternTerm parsePiece(PatternInput var0) {
- CharPatternTerm var1 = parseAtom(var0);
- if (var1 == null) {
- throw new RuntimeException("invalid pattern: empty atom");
- } else {
- if (var0.hasNext()) {
- String var2 = var0.peekNext();
- char var3 = var2.charAt(0);
- if (var3 == '?') {
- var1.setOccurrence(0, 1);
- var0.next();
- } else if (var3 == '*') {
- var1.setOccurrence(0, Integer.MAX_VALUE);
- var0.next();
- } else if (var3 == '+') {
- var1.setOccurrence(1, Integer.MAX_VALUE);
- var0.next();
- } else if (var3 == '{') {
- var0.next();
- int var4 = parseInt(var0);
- if (!peekIs(var0, ',')) {
- throw new RuntimeException("invalid qualifier: no comma");
- }
-
- var0.next();
- int var5;
- if (peekIs(var0, '}')) {
- var5 = Integer.MAX_VALUE;
- } else {
- var5 = parseInt(var0);
- }
-
- if (var5 > var4 || var4 < 0) {
- throw new RuntimeException("invalid qualifier: bad range");
- }
-
- if (!peekIs(var0, '}')) {
- throw new RuntimeException("invalid qualifier: missing closure");
- }
-
- var0.next();
- }
- }
-
- return var1;
- }
- }
-
- protected static CharPatternTerm parseAtom(PatternInput var0) {
- if (peekIs(var0, '(')) {
- var0.next();
- CharPatternTerm var6 = parseExpr(var0);
- if (!peekIs(var0, ')')) {
- throw new RuntimeException("invalid atom: missing closure");
- } else {
- var0.next();
- return var6;
- }
- } else if (peekIs(var0, '[')) {
- return parseCharExpr(var0);
- } else if (!peekIs(var0, '&')) {
- if (!var0.hasNext()) {
- throw new RuntimeException("invalid atom: empty");
- } else {
- CharPatternTerm var5 = new CharPatternTerm(2);
- parseChar(var0, var5);
- return var5;
- }
- } else {
- var0.next();
- if (!peekIs(var0, '#')) {
- throw new RuntimeException("invalid character reference: missing #");
- } else {
- var0.next();
- int var1 = 0;
- byte var2 = 10;
- if (peekIs(var0, 'x')) {
- var0.next();
- var2 = 16;
- }
-
- while(var0.hasNext() && !peekIs(var0, ';')) {
- String var3 = var0.next();
- char var4 = var3.charAt(0);
- var1 = var1 * var2 + Character.digit(var4, var2);
- }
-
- if (!peekIs(var0, ';')) {
- throw new RuntimeException("invalid character reference: missing ;");
- } else {
- var0.next();
- CharPatternTerm var7 = new CharPatternTerm(2);
- var7.addChar((char)var1);
- return var7;
- }
- }
- }
- }
-
- protected static CharPatternTerm parseCharExpr(PatternInput var0) {
- var0.next();
- boolean var1 = peekIs(var0, '^');
- if (var1) {
- var0.next();
- }
-
- int var2 = var1 ? 3 : 2;
- CharPatternTerm var3 = new CharPatternTerm(var2);
- if (peekIs(var0, '[')) {
- CharPatternTerm var4 = parseCharExpr(var0);
- var3.addSubTerm(var4);
- if (peekIs(var0, '-')) {
- var0.next();
- CharPatternTerm var5 = parseCharExpr(var0);
- var3.addNegativeTerm(var5);
- }
-
- if (!peekIs(var0, ']')) {
- throw new RuntimeException("invalid subtraction expr: missing closure");
- }
- } else {
- do {
- char var6 = 0;
- if (peekIs(var0, '-')) {
- var0.next();
- if (peekIs(var0, '[')) {
- var3.addNegativeTerm(parseCharExpr(var0));
- if (!peekIs(var0, ']')) {
- throw new RuntimeException("invalid expr: expected ']'");
- }
- } else {
- var3.addChar('-');
- var6 = '-';
- }
- } else {
- var6 = parseChar(var0, var3);
- }
-
- if (var6 != 0 && peekIs(var0, '-')) {
- var0.next();
- char var7 = parseChar(var0, var3);
- if (var7 == 0 || var6 > var7) {
- throw new RuntimeException("invalid character range");
- }
-
- var3.combineRanges();
- }
- } while(!peekIs(var0, ']'));
- }
-
- var0.next();
- return var3;
- }
-
- protected static void parseCharRange(PatternInput var0, CharPatternTerm var1) {
- parseChar(var0, var1);
- }
-
- protected static char parseChar(PatternInput var0, CharPatternTerm var1) {
- String var2 = var0.next();
- char var3 = var2.charAt(0);
- if (var3 == '\\' && var0.hasNext()) {
- var2 = var0.next();
- var3 = var2.charAt(0);
- switch (var3) {
- case '(':
- case ')':
- case '*':
- case '+':
- case '-':
- case '.':
- case '?':
- case '[':
- case '\\':
- case ']':
- case '^':
- case '{':
- case '|':
- case '}':
- var1.addChar(var3);
- break;
- case 'P':
- case 'p':
- var1.addSubTerm(createUnicodeBlockTerm(var3, var0));
- return '\u0000';
- case 'n':
- var1.addChar('\n');
- break;
- case 'r':
- var1.addChar('\r');
- break;
- case 't':
- var1.addChar('\t');
- break;
- default:
- var1.addSubTerm(createXMLTerm(var3));
- return '\u0000';
- }
-
- return var3;
- } else if (var3 == '.') {
- CharPatternTerm var4 = new CharPatternTerm(3);
- var4.addChar('\n');
- var4.addChar('\r');
- var1.addSubTerm(var4);
- return '\u0000';
- } else {
- var1.addChar(var3);
- return var3;
- }
- }
-
- protected static CharPatternTerm createXMLTerm(char var0) {
- Object var1 = null;
- CharPatternTerm var4;
- if (var0 != 's' && var0 != 'i' && var0 != 'c' && var0 != 'd' && var0 != 'W') {
- var4 = new CharPatternTerm(3);
- } else {
- var4 = new CharPatternTerm(2);
- }
-
- var0 = Character.toLowerCase(var0);
- if (var0 == 's') {
- var4.addChar(' ');
- var4.addChar('\n');
- var4.addChar('\r');
- var4.addChar('\t');
- } else if (var0 == 'i') {
- CharPatternTerm var2 = createUnicodePropertyTerm("L");
- var4.addSubTerm(var2);
- var2 = createUnicodePropertyTerm("Nl");
- var4.addSubTerm(var2);
- var4.addChar(':');
- var4.addChar('_');
- } else if (var0 == 'c') {
- CharPatternTerm var6 = createUnicodePropertyTerm("L");
- var4.addSubTerm(var6);
- var6 = createUnicodePropertyTerm("Nl");
- var4.addSubTerm(var6);
- var6 = createUnicodePropertyTerm("Nd");
- var4.addSubTerm(var6);
- var6 = createUnicodePropertyTerm("Mc");
- var4.addSubTerm(var6);
- var6 = createUnicodePropertyTerm("Me");
- var4.addSubTerm(var6);
- var6 = createUnicodePropertyTerm("Mn");
- var4.addSubTerm(var6);
- var6 = createUnicodePropertyTerm("Lm");
- var4.addSubTerm(var6);
- var4.addSubTerm(var6);
- var4.addChar(':');
- var4.addChar('_');
- var4.addChar('-');
- var4.addChar('.');
- } else if (var0 == 'd') {
- CharPatternTerm var13 = createUnicodePropertyTerm("Nd");
- var4.addSubTerm(var13);
- } else {
- if (var0 != 'w') {
- throw new RuntimeException(String.valueOf(String.valueOf("unrecognized escape char: \"").concat(String.valueOf(var0))).concat(String.valueOf("\"")));
- }
-
- CharPatternTerm var14 = createUnicodePropertyTerm("P");
- var4.addSubTerm(var14);
- var14 = createUnicodePropertyTerm("S");
- var4.addSubTerm(var14);
- var14 = createUnicodePropertyTerm("C");
- var4.addSubTerm(var14);
- }
-
- Debug.assert(var4 != null);
- return var4;
- }
-
- protected static CharPatternTerm createUnicodeBlockTerm(char var0, PatternInput var1) {
- if (!peekIs(var1, '{')) {
- throw new RuntimeException(String.valueOf(String.valueOf("expected \"{\" after \"\\").concat(String.valueOf(var0))).concat(String.valueOf("\"")));
- } else {
- var1.next();
- if (!peekIs(var1, 'I')) {
- throw new RuntimeException("block escape must begin with \"Is\"");
- } else {
- var1.next();
- if (!peekIs(var1, 's')) {
- throw new RuntimeException("block escape must begin with \"Is\"");
- } else {
- var1.next();
-
- String var2;
- for(var2 = ""; !peekIs(var1, '}'); var2 = String.valueOf(var2).concat(String.valueOf(var1.next()))) {
- }
-
- if (peekIs(var1, '}')) {
- var1.next();
- CharPatternTerm var3 = new CharPatternTerm(var0 == 'p' ? 2 : 3);
- var3.setBlock(var2);
- return var3;
- } else {
- throw new RuntimeException("block escape must end with closing brace");
- }
- }
- }
- }
- }
-
- protected static CharPatternTerm createUnicodePropertyTerm(String var0) {
- CharPatternTerm var1 = new CharPatternTerm(2);
- var1.setCategory(var0);
- return var1;
- }
-
- protected static int parseInt(PatternInput var0) {
- int var1 = 0;
-
- while(var0.hasNext()) {
- char var2 = var0.peekNext().charAt(0);
- if (var2 >= '0' && var2 <= '9') {
- var0.next();
- var1 = 10 * var1 + var2 - 48;
- }
- }
-
- return var1;
- }
-
- static boolean peekIs(PatternInput var0, char var1) {
- return var0.hasNext() && var0.peekNext().charAt(0) == var1;
- }
-
- static void test(String var0, String[] var1) {
- Pattern var2 = new Pattern(var0);
- PatternTerm var4 = parse(var0);
- System.out.println(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf("pattern='").concat(String.valueOf(var0))).concat(String.valueOf("' parsed = '"))).concat(String.valueOf(var4 == null ? "" : var4.toString()))).concat(String.valueOf("'"))).concat(String.valueOf(var2.isDeterministic() ? "" : " non-deterministic")));
-
- for(int var5 = 0; var5 < var1.length; ++var5) {
- boolean var3 = var2.matches(var1[var5]);
- System.out.println(String.valueOf(String.valueOf(String.valueOf(" text='").concat(String.valueOf(var1[var5]))).concat(String.valueOf("' => "))).concat(String.valueOf(var3)));
- }
-
- }
-
- public static void main(String[] var0) {
- test("a|b", new String[]{"ab", "aa", "b"});
- test("ab|ac", new String[]{"ab", "b", "a", "abc", "bc"});
- test("abc", new String[]{"x", "ab", "abc", "abcd"});
- test("a*b", new String[]{"ab", "aa", "b"});
- test("a+b", new String[]{"ab", "aa", "b"});
- test("a+b?", new String[]{"ab", "aa", "b"});
- test("a.b", new String[]{"ab", "aab", "aub"});
- test("a.*b", new String[]{"ab", "baab", "aub", "aaaab", "abacab"});
- test("[0-3]*", new String[]{"121", "a12", "12q", "12301230", "0-2"});
- test("[0-9a-fA-F]+", new String[]{"121", "E12", "12q", "12301230", ""});
- test("0x[0-9a-fA-F]+", new String[]{"0x121", "0b12", "0x12q", "x12301230", ""});
- test("((cat)*(dog)*)*", new String[]{"catcat", "dogdogcat", "catat", "dat", "dogcatdogcatcatdog"});
- test("0|((\\+|\\-)?[1-9][0-9]*)", new String[]{"+0", "012", "-43", "314", "abacab", "+400"});
- test("", new String[]{"ab", ""});
- test("a?.", new String[]{"ab", "", "a", "bb"});
- test("a?.?", new String[]{"ab", "", "a", "bb"});
- test("ab|ac", new String[]{"ab", "b", "a", "abc", "bc"});
- test("([a-z])+", new String[]{"ab", "b", "a", "abc", "bc"});
- test("([a-z-[aeiou]])+", new String[]{"ab", "b", "a", "abc", "bc"});
- test("(AB)+", new String[]{"AB", "ab", "ABA", "ABC", "ABAB"});
- test("a[^a-c]*b", new String[]{"ab", "b", "acb", "awb", "aqweb"});
- test("\\i\\c*", new String[]{"ab", "", "_a1", "1z", "abc:X-1"});
- test("[\\i-[:]][\\c-[:]]*", new String[]{"ab", "", "_a1", "1z", "abc:X-1"});
- test("\\W\\s\\d", new String[]{"; 2", ".\t1", "...", "=2"});
- test("\\p{IsBasicLatin}+", new String[]{"ab", "q®w"});
- test("\\p{IsGreek}\\d", new String[]{"Õ≥5", "Õ¥", "a1"});
- System.out.println("done");
- }
- }
-