home *** CD-ROM | disk | FTP | other *** search
- //
- // MiscStringRegex.m
- // Regular expression matching and replacement routines using regexpr.c
- //
- // Written by Carl Lindberg (c) 1994 by Carl Lindberg.
- // Version 1.95 All rights reserved.
- // This notice may not be removed from this source code.
- //
- // This object is included in the MiscKit by permission from the author
- // and its use is governed by the MiscKit license, found in the file
- // "LICENSE.rtf" in the MiscKit distribution. Please refer to that file
- // for a list of all applicable permissions and restrictions.
- //
-
- /* I decided to keep with MiscStringPatterns' -grep method, except to add
- * an occurrenceNum: parameter. The old calls to this method should work
- * exactly as they used to. I did not include the -grepString methods, as
- * I'm not sure how useful they would be, and there are a fair number of
- * methods here already. If anyone thinks they were indeed useful, I can add
- * them in very easily.
- * On the other hand, MiscStringPatterns' -replace methods have been
- * rearranged. To be more parallel with the rest of the MiscString class,
- * I made the global replace its own method, and added an occurrenceNum:
- * parameter to the basic replace method.
- * There are other possible methods here: -indexOfRegex and -rindexOfRegex
- * methods to go along with -spotOfRegex, and maybe a method to simply return
- * a matched portion in a new MiscString. I'm going to let these go for the
- * time being; if people think they are useful I'll add them in then then.
- */
-
- #import <misckit/MiscString.h>
-
- @implementation MiscString(Regex)
-
- - (int)grep:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense before:(id)b middle:(id)m after:(id)a
- {
- int spot,len=0;
- id tmpStr;
-
- if (!regex) return -1;
- if (n == MISC_STRING_LAST)
- spot = [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:&len];
- else
- spot = [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:&len];
-
- // I decided on -setStringValue over -takeStringValueFrom....
- if ((spot >= 0) && (len > 0)) {
- if (b && [b respondsTo:@selector(setStringValue:)]) {
- tmpStr = [self midFrom:0 to:spot-1];
- if (tmpStr) [b setStringValue:[tmpStr stringValue]];
- else [b setStringValue:""];
- [tmpStr free];
- }
- if (a && [a respondsTo:@selector(setStringValue:)]) {
- tmpStr = [self midFrom:spot+len to:length-1];
- if (tmpStr) [a setStringValue:[tmpStr stringValue]];
- else [a setStringValue:""];
- [tmpStr free];
- }
- if (m && [m respondsTo:@selector(setStringValue:)]) {
- tmpStr = [self midFrom:spot length:len];
- if (tmpStr) [m setStringValue:[tmpStr stringValue]];
- else [m setStringValue:""];
- [tmpStr free];
- }
- }
- else { // I'm not sure what I should do here. Leave them alone?
- if (a && [a respondsTo:@selector(setStringValue:)])
- [a setStringValue:""];
- if (b && [b respondsTo:@selector(setStringValue:)])
- [b setStringValue:""];
- if (m && [m respondsTo:@selector(setStringValue:)])
- [m setStringValue:""];
- }
-
- //spotOfRegex returns -2 if a problem, -1 if not found, and >=0 if found.
- //Therefore, add one, then make everything above one equal one.
- if (++spot > 1) spot = 1;
- return spot;
- }
-
-
- - (int)numOfRegex:(const char *)regex caseSensitive:(BOOL)sense
- {
- struct re_pattern_buffer pat; // or regexp_t (which is a pointer)
- int currnum=0, currspot=0,len=0, i, pos;
- char fm[256], tr[256];
- char *errstr;
-
- if (!regex) return -1;
-
- memset(&pat,0,sizeof(pat));
- for (i=0;i<256;i++) tr[i] = i;
- if (!sense)
- for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a';
- pat.translate=tr;
- pat.fastmap=fm;
- errstr = re_compile_pattern((char *)regex,strlen(regex),&pat);
- if (errstr) {
- if (pat.buffer) free(pat.buffer);
- return -1;
- }
-
- while ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0) {
- len = re_match_pattern(&pat,buffer,length,pos,0);
- if(len>0){
- currspot = (pos+len);
- currnum++;
- }
- else {
- if (pat.buffer) free(pat.buffer);
- return currnum;
- }
- }
-
- if (pat.buffer) free(pat.buffer);
- return currnum;
- }
-
-
- - (int)replaceEveryOccurrenceOfRegex:(const char *)regex with:(const char *)aString caseSensitive:(BOOL)sense
- {
- struct re_pattern_buffer pat;
- int currnum=0, currspot=0,len=0;
- char fm[256], tr[256];
- char *errstr;
- int i, pos;
- id tmpStr;
-
- if (!regex) return -1;
- memset(&pat,0,sizeof(pat));
- for (i=0;i<256;i++) tr[i] = i;
- if (!sense)
- for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a';
- pat.translate=tr;
- pat.fastmap=fm;
- errstr = re_compile_pattern((char *)regex,strlen(regex),&pat);
- if (errstr) {
- if (pat.buffer) free(pat.buffer);
- return -1;
- }
-
- tmpStr = [[[self class] alloc] allocateBuffer:length];
- while ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0) {
- len = re_match_pattern(&pat,buffer,length,pos,0);
- [tmpStr cat:buffer+currspot n:pos-currspot];
- if (len>0) {
- [tmpStr cat:aString];
- currnum++;
- currspot = (pos+len);
- }
- else {
- currspot = pos;
- break;
- }
- }
- [tmpStr cat:buffer+currspot n:length - currspot];
- [self takeStringValueFrom:tmpStr];
- [tmpStr free];
- if (pat.buffer) free(pat.buffer);
- return currnum;
- }
-
-
- - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense length:(int *)matchlen
- {
- struct re_pattern_buffer pat; // or regexp_t (which is a pointer)
- int currnum=0, currspot=0, len=0;
- char fm[256], tr[256];
- char *errstr;
- int i, pos=-1;
-
- if (!regex) {
- if (matchlen) *matchlen = 0;
- return -2;
- }
- if (n<0) {
- if (matchlen) *matchlen = 0;
- return -1;
- }
- memset(&pat,0,sizeof(pat));
- for (i=0;i<256;i++) tr[i] = i;
- if (!sense)
- for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a';
- pat.translate=tr;
- pat.fastmap=fm;
- errstr = re_compile_pattern((char *)regex,strlen(regex),&pat);
- if (errstr) {
- if (matchlen) *matchlen = 0;
- if (pat.buffer) free(pat.buffer);
- return -2;
- }
-
- while ((currnum <= n) && ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0)) {
- len = re_match_pattern(&pat,buffer,length,pos,0);
- if(len>0 && currnum <= n){
- currspot = (pos+len);
- currnum++;
- }
- else {
- pos = -1;
- break;
- }
- }
- if (pos < 0) len = 0;
- if (matchlen) *matchlen = len;
- if (pat.buffer) free(pat.buffer);
- return pos;
- }
-
- - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense length:(int *)matchlen
- {
- int num = [self numOfRegex:regex caseSensitive:sense];
- return [self spotOfRegex:regex occurrenceNum:num-1-n caseSensitive:sense length:matchlen];
- }
-
- - replaceRegex:(const char *)regex with:(const char *)aString occurrenceNum:(int)n caseSensitive:(BOOL)sense
- {
- int spot, len;
-
- spot = [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:&len];
- if ((spot >= 0) && (len > 0))
- [self replaceFrom:spot length:len with:aString];
- return self;
- }
-
- // I thought about having this return a negative number on an error, but I decided
- // to return 0 on both error or not found, and the length of the matched
- // portion otherwise. That enables this method to be used kind of like a BOOL
- // in an if statement.
- - (int)matchesRegex:(const char *)regex caseSensitive:(BOOL)sense
- {
- int spot, len=0;
- if (!regex) return 0;
- spot = [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:&len];
- if (spot == 0) return len;
- return 0;
- }
-
- //Now all the convenience methods....
- - (int)grep:(const char *)regex
- { return [self grep:regex occurrenceNum:0 caseSensitive:YES before:nil middle:nil after:nil];}
-
- - (int)grep:(const char *)regex caseSensitive:(BOOL)sense
- { return [self grep:regex occurrenceNum:0 caseSensitive:sense before:nil middle:nil after:nil];}
-
- - (int)grep:(const char *)regex occurrenceNum:(int)n
- { return [self grep:regex occurrenceNum:n caseSensitive:YES before:nil middle:nil after:nil];}
-
- - (int)grep:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense
- { return [self grep:regex occurrenceNum:n caseSensitive:sense before:nil middle:nil after:nil];}
-
- - (int)grep:(const char *)regex before:(id)b middle:(id)m after:(id)a
- { return [self grep:regex occurrenceNum:0 caseSensitive:YES before:b middle:m after:a];}
-
- - (int)grep:(const char *)regex caseSensitive:(BOOL)sense before:(id)b middle:(id)m after:(id)a
- { return [self grep:regex occurrenceNum:0 caseSensitive:sense before:b middle:m after:a];}
-
- - (int)grep:(const char *)regex occurrenceNum:(int)n before:(id)b middle:(id)m after:(id)a
- { return [self grep:regex occurrenceNum:n caseSensitive:YES before:b middle:m after:a];}
-
- - (int)numOfRegex:(const char *)regex
- { return [self numOfRegex:regex caseSensitive:YES];}
-
- - (int)replaceEveryOccurrenceOfRegex:(const char *)regex with:(const char *)aString
- { return [self replaceEveryOccurrenceOfRegex:regex with:aString caseSensitive:YES];}
-
- - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withChar:(char)aChar caseSensitive:(BOOL)sense
- {
- char str[2];
-
- if (!aChar) return -1; //or should we let this go?
- str[1] = 0;
- str[0] = aChar;
- return [self replaceEveryOccurrenceOfRegex:regex with:str caseSensitive:sense];
- }
-
- - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withChar:(char)aChar
- { return [self replaceEveryOccurrenceOfRegex:regex withChar:aChar caseSensitive:YES];}
-
- - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withString:(id)sender
- { return [self replaceEveryOccurrenceOfRegex:regex withString:sender caseSensitive:YES];}
-
- - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withString:(id)sender caseSensitive:(BOOL)sense
- {
- if (![sender respondsTo:@selector(stringValue)]) return -1;
- return [self replaceEveryOccurrenceOfRegex:regex
- with:[sender stringValue]
- caseSensitive:sense];
- }
-
- - (int)spotOfRegex:(const char *)regex
- { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:NULL];}
-
- - (int)spotOfRegex:(const char *)regex caseSensitive:(BOOL)sense
- { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:NULL];}
-
- - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n
- { return [self spotOfRegex:regex occurrenceNum:n caseSensitive:YES length:NULL];}
-
- - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense
- { return [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:NULL];}
-
- - (int)spotOfRegex:(const char *)regex length:(int *)matchlen
- { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:matchlen];}
-
- - (int)spotOfRegex:(const char *)regex caseSensitive:(BOOL)sense length:(int *)matchlen
- { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:matchlen];}
-
- - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n length:(int *)matchlen
- { return [self spotOfRegex:regex occurrenceNum:n caseSensitive:YES length:matchlen];}
-
-
- - (int)rspotOfRegex:(const char *)regex
- { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:NULL];}
-
- - (int)rspotOfRegex:(const char *)regex caseSensitive:(BOOL)sense
- { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:NULL];}
-
- - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n
- { return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:YES length:NULL];}
-
- - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense
- { return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:sense length:NULL];}
-
- - (int)rspotOfRegex:(const char *)regex length:(int *)matchlen
- { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:matchlen];}
-
- - (int)rspotOfRegex:(const char *)regex caseSensitive:(BOOL)sense length:(int *)matchlen
- { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:matchlen];}
-
- - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n length:(int *)matchlen
- { return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:YES length:matchlen];}
-
-
- - replaceRegex:(const char *)regex with:(const char *)aString
- { return [self replaceRegex:regex with:aString occurrenceNum:0 caseSensitive:YES];}
-
- - replaceRegex:(const char *)regex with:(const char *)aString caseSensitive:(BOOL)sense
- { return [self replaceRegex:regex with:aString occurrenceNum:0 caseSensitive:sense];}
-
- - replaceRegex:(const char *)regex with:(const char *)aString occurrenceNum:(int)n
- { return [self replaceRegex:regex with:aString occurrenceNum:n caseSensitive:YES];}
-
- - replaceRegex:(const char *)regex withChar:(char)aChar
- { return [self replaceRegex:regex withChar:aChar occurrenceNum:0 caseSensitive:YES];}
-
- - replaceRegex:(const char *)regex withChar:(char)aChar caseSensitive:(BOOL)sense
- { return [self replaceRegex:regex withChar:aChar occurrenceNum:0 caseSensitive:sense];}
-
- - replaceRegex:(const char *)regex withChar:(char)aChar occurrenceNum:(int)n
- { return [self replaceRegex:regex withChar:aChar occurrenceNum:n caseSensitive:YES]; }
-
- - replaceRegex:(const char *)regex withChar:(char)aChar occurrenceNum:(int)n caseSensitive:(BOOL)sense
- {
- char str[2];
- if (!aChar) return nil; //or self? or check for this at all?
- str[1] = 0;
- str[0] = aChar;
- return [self replaceRegex:regex with:str occurrenceNum:n caseSensitive:sense];
- }
-
- - replaceRegex:(const char *)regex withString:(id)sender
- { return [self replaceRegex:regex withString:sender occurrenceNum:0 caseSensitive:YES];}
-
- - replaceRegex:(const char *)regex withString:(id)sender caseSensitive:(BOOL)sense
- { return [self replaceRegex:regex withString:sender occurrenceNum:0 caseSensitive:sense]; }
-
- - replaceRegex:(const char *)regex withString:(id)sender occurrenceNum:(int)n
- { return [self replaceRegex:regex withString:sender occurrenceNum:n caseSensitive:YES]; }
-
- - replaceRegex:(const char *)regex withString:(id)sender occurrenceNum:(int)n caseSensitive:(BOOL)sense
- {
- if (![sender respondsTo:@selector(stringValue)]) return self; //hmmm
- return [self replaceRegex:regex with:[sender stringValue]
- occurrenceNum:n caseSensitive:sense];
- }
-
- - (int)matchesRegex:(const char *)regex
- { return [self matchesRegex:regex caseSensitive:YES];}
-
- @end