home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
perl501m.zip
/
lib
/
File
/
KGlob2RE.pm
< prev
next >
Wrap
Text File
|
1995-03-28
|
8KB
|
237 lines
#!/usr/bin/perl
# KGlob2RE.pm # Based on glob2re.pl 1.5 1992/12/09 23:31:01 Tye McQueen
# @(#)KGlob2RE.pm 1.2, 95/03/27 22:00:11
# Convert Unix file "glob" patterns to Perl regular expressions.
require 5.000;
package File::KGlob2RE;
require Exporter;
@ISA = (Exporter);
@EXPORT = qw( &kglob2re );
# The supported features are (where x is a single character and X and Y are
# strings):
#
# . matches .
# \x matches x
# [...] matches a single character falling into the described character class
# ? matches any single character except /
# * matches zero or more characters not including /
# % matches zero or more directories (see technical notes at bottom)
# {X,Y} matches either pattern X or Y (can list any number of patterns)
#
#Examples:
# %s.* matches any file whose name starts with "s." in any directory
# %*.o matches any file whose name ends with ".o" in any directory
# %old/* matches any file in a directory called "old"
# %new%* matches any file in a directory called "new" or a subdirectory
# of a directory called "new"
# /%* matches any absolute path name
# {*,?*%*} matches any relative path name (this would be very inefficient
# except that it is specifically optimized)
# %X X%* these are also specifically optimized
sub kglob2re {
local( $glob )= @_;
local( $re )= "^";
local( $quote, $bracket, $brace, $slash );
# Optimize special cases:
return( "^[^/]?" )
if $glob =~ m-\{\?\*/?%/?\*,\*\}- # {?*/%/*,*} -> ^[^/]
|| $glob =~ m-\{\*,\?\*/?%/?\*\}-; # {*,?*/%/*} -> ^[^/]
for( split( //, $glob ) ) { # Go through glob pattern 1 char at a time:
$slash-- if $slash; # Was a / appended to $re last time?
if( $quote ) { # Was the last character \ ?
$re .= $_; # Don't interpret this character
$quote= 0; # Don't quote next char too
} elsif( '\\' eq $_ ) { # Is this character a \ ?
$re .= $_;
$quote= 1; # Quote next character
} elsif( $bracket ) { # Are we still inside a [...] ?
if( 1 == $bracket && "^" eq $_ ) {
$bracket= 2;
} else {
if( 2 == $bracket ) {
if( "-" eq $_ ) {
$re .= "-"; # [^-...] -> [^-/...]
$_= "/"; # (avoid [^-z] -> [^/-z])
} else {
$re .= "/"; # [^...] -> [^/...]
}
}
$bracket= 3;
}
$re .= $_;
$bracket= 0 if "]" eq $_; # Unquoted ] ends a [...]
} elsif( "[" eq $_ ) { # Start a [...]:
$re .= $_;
$bracket= 1;
} elsif( "?" eq $_ ) {
$re .= ".";
} elsif( "{" eq $_ ) # } # Start an {X,Y}:
{ # ^ so % works in vi
$re .= "("; # {X,Y} -> (X|Y)
$brace++; # Remember how many started
} elsif( "," eq $_ ) { # Inside a {X,Y}, comma -> | ...
$re .= $brace ? "|" : "\\,"; # else comma -> \, (to be safe)
} # {
elsif( "}" eq $_ ) {
if( $brace ) { # Completed an {X,Y}
$re .= ")";
$brace--;
} else { # { <- so % works in vi
$re .= "\\}"; # { {
warn "Unquoted, unmatched `}' will be treated as `\\}'\n";
}
} elsif( /\s/ ) { # Quote white space to avoid warning
warn qq-Unquoted white space in file glob pattern: "$glob"\n-;
$re .= $_; # else I assume it is an accident
} elsif( "*" eq $_ ) { # * won't match /
$re .= "[^/]*"; # * -> [^/]*
} elsif( "/" eq $_ ) {
$re .= $_ unless $slash; # // -> / and %/ -> %
$slash= 2; # So we know next time
} elsif( "%" eq $_ ) {
if( $slash ) { # Check this because....
$re .= "(|.*/)"; # (don't include another leading /)
} elsif( "^" eq $re ) { # .../%X is different than %X
$re= "(^|/)"; # %[/]X -> ^(|.*/)X$ -> (^|/)X$
} else {
$re .= "/(|.*/)"; # X[/]%[/]Y -> ^X/(|.*/)Y$
}
$slash= 2; # Don't include an extra tailing slash
} elsif( /\w/ ) { # Any letter, number, or _ :
$re .= $_; # stays the same
} else { # Any other symbol, quote it:
$re .= "\\" . $_; # Includes ' so m'...' works.
}
}
if( $quote || $bracket || $brace ) {
warn "Unexpected end of file glob pattern: $glob\n";
return undef;
}
if( $re !~ s-$NOQT/\(\|,\.\*/\)$-\1/- ) { # X/%* -> ^X/
$re .= '$';
} elsif( "" eq $re ) { # Since m// means something else:
$re= "^"; # %/* -> anything
}
$re;
}
package main;
require File::Basename; import File::Basename qw(basename);
if( &basename( $0 ) eq &basename( __FILE__ ) ) {
# Use `find ... -print | KGlob2RE.pm "pattern" [...]' to use as pipe or test
eval <<'EXAMPLE';
import File::KGlob2RE qw(&kglob2re);
sub quote { local($*)= 1; $_[1] =~ s/^$_[0]//g; $_[1]; }
if( 0 == @ARGV ) {
die "e( "\t*:\t", <<" ;" ), "\n";
: Usage: KGlob2RE.pm [-e] { "pattern" | -f file } [...]
: Examples:
: find . -print | KGlob2RE.pm "%*.c" | xargs grep -i "boogers"
: \\ls | KGlob2RE.pm "*.dat *.idx" | xargs chmod ug=rw,o=r
: Note that if only one argument is given and it contains one or
: more spaces, then it is split into several patterns because
: just using one set of quotes (") for the whole list is usually
: much easier. This splitting is *not* done if two or more
: arguments are given. "-f file" reads patterns, one per line,
: from the specified file (trailing spaces, #-comments, and
: blank lines in the file are ignored). Patterns begining with
: "!" exclude matching files. "-e" causes exceptions (files
: neither explicitly matched nor excluded) to generate a message
: on STDERR noting this.
;
}
if( "-e" eq $ARGV[0] ) {
$Warn= 1;
shift( @ARGV );
}
if( 1 == @ARGV && index($ARGV[0],' ') ) {
@ARGV= split( ' ', $ARGV[0] );
}
if( @ARGV < 2 ) { # Simpler example using single pattern:
$re= &kglob2re( $ARGV[0] );
while( <STDIN> ) { # For each file name read from stdin:
chop; # Take off the trailing newline
$_ .= "/" if -d $_ && ! m-/$-; # Put / on end of dir names
if( m/$re/o ) {
print "$_\n"; # Only print names matching pattern
} elsif( $Warn ) {
warn "File $_ unmatches by any pattern.\n";
}
}
} else {
while( @ARGV ) {
$_= shift( @ARGV );
if( /^-f/ ) {
if( "" eq ( $_= substr($_,2) ) ) {
@ARGV || die "Required file name missing after -f.\n";
$_= shift( @ARGV );
}
open( PAT, "<$_" )
|| die "Can't read patterns from $_: $!\n";
push( @pats, grep( (chop,s=\s*#.*==,length), <PAT> ) );
close( PAT );
} else {
push( @pats, $_ );
}
}
for( @pats ) {
if( /^!/ ) {
$if .= "\treturn 0 if m'" . &kglob2re(substr($_,1)) . "'o;\n";
} else {
$if .= "\treturn 1 if m'" . &kglob2re($_) . "'o;\n";
}
}
eval "sub matches {\n$if\t-1; }";
while( <STDIN> ) {
chop;
$_ .= "/" if -d $_ && ! m-/$-;
$re= &matches;
if( 1 == $re ) {
print "$_\n";
} elsif( $Warn && -1 == $re ) {
warn "File $_ unmatched by any pattern.\n";
}
}
}
EXAMPLE
chop $@; die $@ if $@;
}
#Technical notes:
#
# Items were listed in order of precedence. For example: \[ matches [; ?, *,
# %, and { have no special meaning within [...]; \x within [...] matches x so
# [\][] matches [ or ]; all, including {X,Y}, can be used within {X,Y}.
#
# % will match / or /.../. If % is the first character of a pattern, it will
# also match the empty string. For sanity, /%/, /%, and %/ are equivalent to %
# except that this will not cause % to be considered as the first character in
# a pattern. So "/%/X" and "/%X" will match "/X" but not "X" (which is good).
#
# Note that {} and % interfere in the following ways: A % inside {} is never
# considered as being the first character of a pattern, even when it probably
# should be; If /'s or %'s (but not both) are nested in {} they will not be
# treated as adjacent and so the (possibly redundant) / will not be removed,
# even when it probably should be removed.
#
# Hint: If you have "/" appended to all directory file names, patterns ending
# in "/" will only match directory names. A % at the end of a pattern will
# never match unless you do this.
#
# You can use
# m/$re/[o]
# or
# eval "m'" . $re . "'"
# But other choices may not work. For example,
# eval "m/" . $re . "/"
# won't because I don't bother to quote /'s. And
# eval 'm"' . $re . '"'
# risks interpretation of $ in unexpected ways (I think).
1;