home *** CD-ROM | disk | FTP | other *** search
- //
- // MiscStringPatterns.m -- Pattern matching and replacement routines
- // Written by Steve Hayman (c) 1994 by Steve Hayman.
- // Version 1.95 All rights reserved.
- // This notice may not be removed from this source code.
- //
- // This object is included in the MiscKit by permission from the author
- // and its use is governed by the MiscKit license, found in the file
- // "LICENSE.rtf" in the MiscKit distribution. Please refer to that file
- // for a list of all applicable permissions and restrictions.
- //
-
- #import <misckit/MiscString.h>
-
- @implementation MiscString(PatternMatching)
- /*
- * Match strings against regular expressions, using re_compile
- * Steve Hayman
- * November 23, 1993
- */
- /*
- * Match a specified pattern. Returns 1 or 0 depending on whether the
- * pattern is found in the indicated string. Returns -1 if bogus regexp.
- * Will also optionally fill in strings with the portion of the destination
- * string before the match, the portion that matches, and the portion
- * after the match.
- */
- #import <regex.h>
-
- #ifdef DONT_COMPILE // obsolete method
- - (int) grep:(const char *)pattern caseSensitive:(BOOL)caseSens before:bstring middle:mstring after:astring
- {
-
- struct regex *reg;
- int success;
- char * start, *end;
- MiscString *scratch;
- char *s = (char *)[self stringValue];
-
- if ( s == NULL )
- return NO; // nothing matches an empty string
-
- // caseSensitive:YES means "fold case: NO", so we pass the opposite
- // to re_compile.
-
- reg = re_compile((char *)pattern, !caseSens);
-
- if ( reg == NULL )
- return -1; // bogus regular expression
-
- success = re_match( s, reg );
-
- switch( success ) {
- case 0: // didn't match
- free(reg);
- return 0;
- case -1: // bogus regular expression
-
- free(reg);
-
- return -1;
-
- default: // matched.
- start = reg->start;
- end = reg->end;
-
- // fill in each of the various substrings, if desired
-
- // the part before the match
- if ( bstring ) {
- scratch = [self midFrom:0 to:start - s - 1];
- [bstring takeStringValue:scratch];
- [scratch free];
- }
- // the part that matched
-
- if ( mstring ) {
- scratch = [self midFrom:(start - s) to:(end - s) - 1];
- [mstring takeStringValue:scratch];
- [scratch free];
- }
-
- // the part after the match
-
- if ( astring ) {
- scratch = [self midFrom: end - s to: [self length] - 1];
- [astring takeStringValue: scratch];
- [scratch free];
- }
- free(reg);
- return 1;
- }
- }
- #endif
-
- /*
- * Variants on the above.
- */
-
- #ifdef DONT_COMPILE // obsolete method
- - (int) grep:(const char *)pattern caseSensitive:(BOOL)caseSens
- {
- return [self grep:pattern caseSensitive:caseSens
- before:nil middle:nil after:nil];
- }
- #endif
-
- #ifdef DONT_COMPILE // obsolete method
- - (int) grep:(const char *)pattern
- {
- return [self grep:pattern caseSensitive:YES];
- }
- #endif
-
- - (int) grepString:pattern caseSensitive:(BOOL)caseSens before:bstring middle:mstring after:astring
- {
- return( [self grep:[pattern stringValue] caseSensitive:caseSens
- before:bstring middle:mstring after:astring] );
-
- }
-
- - (int) grepString:pattern caseSensitive:(BOOL)caseSens
- {
- return [self grepString:pattern caseSensitive:caseSens
- before:nil middle:nil after:nil];
- }
-
- - (int) grepString:pattern
- {
- return [self grepString:pattern caseSensitive:YES];
- }
-
-
- - (int)replacePattern:(const char *)pattern caseSensitive:(BOOL)caseSens globally:(BOOL)glob with:(const char *)replacement
- {
- id before = [[MiscString alloc] init];
- id after = [[MiscString alloc] init];
- id middle = [[MiscString alloc] init];
-
- id newString = [[MiscString alloc] init];
- id newReplacement = [[MiscString alloc] init];
- id grepMe;
-
- int r;
- int replacements = 0;
-
- grepMe = self;
-
- // Do at least one replacement; if "glob" is TRUE, keep going
- // until we can't do any more.
-
- do {
-
- r = [grepMe grep:pattern caseSensitive:caseSens
- before:before middle:middle after:after];
-
- if ( r <= 0 )
- break; // no match - or no more matches
-
- if ( [before stringValue] )
- [newString concatenate:before];
- if ( replacement ) {
- #ifdef FUTURE_FEATURE
- /*
- * ed has this notion of "&" on the right hand side of
- * a substitution meaning "interpolate the text that was
- * matched", i.e. "s/foo/&bar/" produces "foobar".
- *
- * In addition ed lets you mark out sub-expressions
- * with \(\), and you can use \1, \2 ... to refer to
- * the corresponding matched text.
- * s/\(A*\)\(B*\)/\2\1/
- * which would turn AAAAABB into BBAAAAA
- * This is ALMOST really easy to do here.
- * To get "&" working it is ALMOST a matter of doing
- *
- * [newReplacement setStringValue:replacement];
- * [newReplacement replacePattern:"&"
- * caseSensitive:NO globally:YES
- * withString:middle];
- * [newString concatentate:newReplacement];
- *
- * which would be a neat use of recursion.
- * So why haven't I done this? Well, you should also
- * support "\&" meaning "a literal '&'" on the right
- * hand side, and, well, I couldn't think of a quick
- * two-line way to sneak that in. Also you would get
- * in trouble if "middle" contained a "&".
- *
- * Also, the regex structure contains
- * char *braslist[NBRA];
- * char *braelist[NBRA];
- *
- * These are pointers to the beginning and end of
- * parenthesized sub-expressions matched in the
- * input text. You could use these to implement
- * replacements of \1, \2 ... \9, but, again, you would
- * need to be careful that you weren't matching "\\1".
- *
- * Maybe next time.
- * steve
- */
-
- // "&" in the replacement string stands for the text
- // that was matched - just like "ed"
- [newReplacement setStringValue:replacement];
- [newReplacement replacePattern:"&" caseSensitive:NO globally:YES
- withString:middle];
-
-
- [newString concatenate:newReplacement];
- #else
- // do the simple-minded substitution in lieu of fanciness above
-
- [newString cat:replacement];
- #endif
- }
- replacements ++;
-
- // next time around we match on the remainder of the string
- grepMe = after;
- } while ( glob );
-
-
- if ( [after stringValue] )
- [newString concatenate:after];
-
- // If any changes were made, copy the new string.
-
- if ( replacements )
- [self takeStringValue:newString];
-
- [before free];
- [after free];
- [middle free];
- [newString free];
- [newReplacement free];
-
- // Return number of replacements made, or -1 if bogus regexp.
- return ( r < 0 ? r : replacements );
- }
-
- /*
- * Various other flavours of replacePattern
- */
-
- - (int)replacePattern:(const char *)pattern caseSensitive:(BOOL)caseSens globally:(BOOL)glob withString:replacement
- {
- return ( [self replacePattern:pattern caseSensitive:caseSens
- globally:glob with:[replacement stringValue] ]);
- }
-
- - (int)replacePatternString:pattern caseSensitive:(BOOL)caseSens globally:(BOOL)glob with:(const char *)replacement
- {
- return [self replacePattern:[pattern stringValue] caseSensitive:caseSens globally:glob with:replacement];
- }
- - (int)replacePatternString:pattern caseSensitive:(BOOL)caseSens globally:(BOOL)glob withString:replacement
- {
- return ( [self replacePattern:[pattern stringValue] caseSensitive:caseSens
- globally:glob
- with:[replacement stringValue]] );
- }
-
-
- @end
-