home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (C) 1993 Marc Stern (internet: stern@mble.philips.be) */
-
- #include "strings.h"
- #include <stdlib.h>
-
-
- /*
- Functions : matchset
- match
- recursexp
- regexp
- */
-
-
-
- /***
- * Function : matchset (internal)
- *
- * Description : Test if a character matches a set expression.
- *
- * Parameters : in char c character to be matched
- * in char *pattern regular expression to match
- *
- * Parameters : in char *string
- *
- * Decisions : The following symbols are treated specially:
- *
- * \ quote next character - range of values
- * ^ non-inclusion (if first character)
- *
- * ex: aeiou0-9 match a, e, i, o, u, and 0 thru 9
- * ^aeiou0-9 match anything but a, e, i, o, u, and 0 thru 9
- *
- * Return : 1 or 0
- *
- * OS/Compiler : All
- ***/
-
- int matchset( char c, char *pattern )
-
- { const char *begin;
- int match_ok = 1;
-
- if ( ! c ) return 0;
-
- if ( *pattern == '^' ) { match_ok = 0; pattern ++; }
-
- for ( begin = pattern; *pattern; begin = pattern++ )
- {
- if ( (*pattern == '-') && (pattern != begin) ) /* range */
- {
- pattern ++; if ( *pattern == '\\' ) pattern ++;
- if ( (min(*pattern, *begin) <= c) && (max(*pattern, *begin) >= c) )
- return match_ok;
- }
-
- if ( *pattern == '\\' ) pattern++;
-
- if ( *pattern == c ) return match_ok;
- }
-
- return (! match_ok);
- }
-
-
-
-
- /***
- * Function : match (internal)
- *
- * Description : Returns the number of character of a string matched
- * by a one character regular expression.
- *
- * Decisions : The following symbols are treated specially:
- *
- * . any character \ quote next character
- * [] set of characters
- *
- * Parameters : in char *string input string to be matched
- * in char *pattern regular expression to match
- *
- * Side-effects: pattern contents will be destroyed.
- *
- * Return : number of character matched by regular expression
- * 0 if not matched
- *
- * OS/Compiler : All
- ***/
-
- static int near match( const char *string, char *pattern )
-
- { char *ptr;
- int length = 0;
-
- switch ( *pattern )
- {
- case '.' : *pattern = '\0';
- length = strlen(string);
- break;
-
- case '[' : for ( ptr = ++pattern;
- *ptr && ! (*ptr == ']' && *(ptr - 1) != '\\');
- ptr ++ );
-
- if ( *ptr ) *ptr = '\0';
- while ( matchset(*string++, pattern) ) length ++;
- break;
-
- case '\\': pattern ++;
-
- default : while ( *string++ == *pattern ) length ++;
- *pattern = '\0';
- break;
- }
-
- return length;
- }
-
-
-
-
- /***
- * Function : recursexp
- *
- * Description : Returns the number of character of a string matched
- * by a regular expression.
- *
- * Decisions : The following symbols are treated specially:
- *
- * . any character \ quote next character
- * * match zero or more + match one or more
- * [] set of characters
- *
- *
- * Parameters : in char *string input string to be matched
- * in char *pattern regular expression to match
- *
- * Return : number of character matched by regular expression
- * -1 if not matched
- *
- * OS/Compiler : All
- ***/
-
- int recursexp( const char *string, char *pattern )
-
- { int count1, count2 = -1, minone = 0;
- char *pattrn;
-
- if ( ! *pattern ) return 0;
-
- pattrn = strdup( pattern );
- count1 = match( string, pattrn );
- if ( count1 < 0 ) { free( pattrn );
- return -1;
- }
-
- while ( *pattrn++ );
-
- switch ( *pattrn )
- {
- case '\0': free( pattrn );
- if ( count1 ) return 1;
- else return -1;
-
- case '*': pattrn ++;
- minone = 0;
- break;
-
- case '+': if ( ! count1 ) return -1;
- pattrn ++;
- break;
-
- default : if ( ! count1 ) return -1;
- count1 = 1;
- break;
- }
-
- for ( ; count1 >= 0; count1 -- )
- {
- count2 = recursexp( string + count1, pattrn );
- if ( count2 >= 0 ) break;
- }
-
- free( pattrn );
-
- if ( count2 < 0 ) return -1;
- if ( ! count1 && minone ) return -1;
-
- return (count1 + count2);
- }
-
-
-
-
- /***
- * Function : regexp
- *
- * Description : Returns the string matched by a regular expression
- * into a string.
- *
- * Decisions : The following symbols are treated specially:
- *
- * ^ start of line $ end of line
- * ? any character \ quote next character
- * * match zero or more [] set of characters
- *
- * ex: [aeiou0-9] match a, e, i, o, u, and 0 thru 9
- * [^aeiou0-9] match anything but a, e, i, o, u, and 0 thru 9
- *
- * Parameters : out char *outstr resulting string
- * in char *string input string in which we search
- * in char *pattern regular expression to match
- *
- * Return : - pointer to resulting string
- * - if ( outstr == NULL ) returns pointer to matched string
- * inside 'string'.
- *
- * OS/Compiler : All
- ***/
-
- char *regexp( char *outstr, const char *string, const char *pattern )
-
- { char *ptr, *pattrn;
- int count, begin = 0, end = 0;
-
- pattrn = strdup( pattern );
- ptr = strend( pattrn ) - 1;
- if ( (*ptr == '$') && (*(ptr - 1) != '\\') ) /* Match end of line */
- {
- end = 1;
- *ptr = '\0';
- }
-
- if ( *pattrn == '^' ) /* Match begin of line */
- {
- begin = 1;
- pattrn ++;
- }
-
- for (; *string; string ++ )
- if ( (count = recursexp(string, pattrn)) >= 0 || begin ) break;
-
- free( pattrn );
-
- if ( end && (count != strlen(string)) ) count = 0;
-
- if ( outstr )
- {
- strleft( outstr, string, count );
- return outstr;
- }
- return string;
- }
-
-
- #ifdef TEST
-
- #include <stdio.h>
- #include <stdlib.h>
-
- void main()
-
- { char string[255], pattern[255], result[255];
-
- regexp( result, "I123", "[^A-Z\\-^][0-9]+\\.*" );
- printf( "\n Result : %s\n\n", result );
- exit(1);
-
- for (;;) {
- printf( "\n String : " ); gets( string );
- printf( " Pattern: " ); gets( pattern );
-
- regexp( result, string, pattern );
- printf( "\n Result : %s\n\n", result );
- }
- }
-
- #endif
-