home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
perl501m.zip
/
lib
/
File
/
KGlob.pm
< prev
next >
Wrap
Text File
|
1995-03-28
|
11KB
|
292 lines
#!/usr/bin/perl
# KGlob.pm # Based on: glob.pl 1.1 1992/12/08 17:55:21 Tye McQueen
# @(#)KGlob.pm 1.2, 95/03/27 21:57:41
# Expand a Unix file glob (wildcard) into a list of matching file names.
require 5.001;
package File::KGlob;
require Exporter;
@ISA = (Exporter);
@EXPORT_OK = qw( &glob &kglob &pglob &fglob &unbrac );
require File::KGlob2RE;
# &glob( "pat" [, ...] ) - Expands Unix file glob(s) into the list of
# matching Unix files. The following contructs are supported:
# \x matches x
# [abc] matches the single character "a", "b", or "c"
# [a-c] same as above
# [^a-c] matches any single character but "a", "b", "c", "/", or "\0"
# ? matches any single character except "/" and "\0"
# * matches zero or more characters not including "/" and "\0"
# {X,Y,Z} matches one of the patterns X, Y, or Z
%GlobContext= ();
sub glob {
my( $pkg, $file, $line )= caller;
if( ! $GlobContext{$file,$line} ) {
my( @list )= &kglob( @_ );
@list= sort( @list ) if $Sort;
$GlobContext{$file,$line}= \@list;
}
if( wantarray ) {
my( @return )= @{$GlobContext{$file,$line}};
delete $GlobContext{$file,$line};
@return;
} else {
my( $return )= shift( @{$GlobContext{$file,$line}} );
delete $GlobContext{$file,$line} if ! defined($return);
$return;
}
}
# &kglob() always returns an array of matches; the complexity of the
# algorythm would require a great deal of saved context to allow each
# match to be returned separately like is possible with &fglob().
# The array of values is not necessarilly sorted (that is easy enough
# to do if you want it so we won't waste the time to do it in case you
# don't want to).
# kglob may suprise you in the following ways:
# - {a,b} expands to ("a","b") even if files "a" and/or "b" do not exist
# - [^a-z] is supported (any character except a through z, /, and \0)
# - a leading dot (.) in any component of the path must be matched
# explicitly (with a dot, not with [^a-z], nor [.x], etc.)
# - {.,x}* matches .* (as well as x*)
# - setting $File::KGlob::Safe to a true value prevents "." and ".."
# in any component of the path from matching except exactly (by the
# pattern "." or "..")
# - \x is supported (expands to just "x")
# - % is not support (just matches "%") but File::KGlob2RE supports it
# - ~user and ~/ are supported as is ~{user1,user2} etc.
sub kglob {
my( @alts, @return, $user, $home );
foreach( @_ ) {
# If unquoted "{" in string, generate all possible combinations: #}
@alts= m#(^|[^\\])(\\\\)*\{# ? &unbrac( $_ ) : ( $_ ); #}
foreach( @alts ) {
if( m#^~([^/]+)# ) { # Expand ~user to user's home directory:
$user= $1 || getlogin(); # ~/ means "my" home directory
$home= $1 ? ( (getpwnam($1))[7] || "~$user" )
: ( (getpwuid($<))[7] || $ENV{'HOME'} || "/" );
s##$home#;
# Replace "~user" with user's home directory (unless no such
# user, then leave as is), unless is "~/" and getlogin()
# failed, then try by current UID then $HOME then "/".
}
if( m#(^|[^\\])(\\\\)*[\[\?\*]# ) { # Some kind of wildcard:
push( @return, &pglob($_) ); # Find matching files.
} else { # Just a string, perhaps with \-quoting:
s/\\(.)/\1/g; # Remove the \'s used for quoting.
push( @return, $_ );
}
}
}
@return;
}
# &unbrac( $str ) - Expands a string containing "{a,b}" constructs. Returns
# an array of strings. "\" may be used to quote "{", ",", or "}" to suppress
# its special meaning (the "\"s are left in the returned strings).
# This is a more efficient method than &glob() to expand these contructs
# where no file wildcards are involved.
sub unbrac {
local( $glob )= @_;
local( $pos, $bef, @bef, $temp, $mid, @mid, $aft, @aft, @return );
$pos= rindex($glob,"{"); # Find the last "{" #}}
while( 0 <= $pos ) { # Until there are no more "{"s to find: #}
$bef= substr( $glob, 0, $pos ); # Part before "{" #}
$temp= substr( $glob, 1 + $pos ); # Part after "{" #}
if( $bef =~ m#(^|[^\\])(\\\\)*$# ) { # The "{" is unquoted: #}{
$pos= index( $temp, "}" ); #{ Find the next nearest "}"
while( 0 <= $pos ) { #{ Until we run out of "}"s:
$mid= substr( $temp, 0, $pos ); # Part between "{" and "}" #{
$aft= substr( $temp, 1 + $pos ); # Part after "}"
if( $mid =~ m#(^|[^\\])(\\\\)*$# ) { #{ The "}" is unquoted:
$mid =~ s/((^|[^\\])(\\\\))*,/\1\0/g; # Most unquoted ","s
$mid =~ s/((^|[^\\])(\\\\))*,/\1\0/g; # Remaining ones
return &mcat( $bef, $aft, split(/\0/,$mid) ); # Done!
} # &mcat builds all of the resulting strings.
} # &mcat also "unbrac"s $bef and $aft.
if( $Debug ) {
die "Unclosed `{' in pattern string: `", #}
$bef, "' . `{' . `", $aft, "'\n"; #}
}
}
$pos= rindex( $glob, "{", $pos - 1 ); #}
}
( $glob ); # No unquoted "{"s to be expanded #}
}
# &File::KGlob::mcat( $bef, $aft, @mids ) - Used by &unbrac to make the code
# easier to follow. Builds all of the strings $bef . $mids[$i] . $aft and
# then calls &unbrac on each of them.
sub mcat {
local( $bef, $aft, @mid )= @_;
local( @bef, @aft, $one, $two, $three, @return );
foreach( @mid ) {
push( @return, &unbrac( $bef . $_ . $aft ) );
}
@return;
}
# &pglob( $glob ) - Expand a Unix file glob except for "{a,b}" constructs.
# The name is short for "Path GLOB".
sub pglob {
local( $glob )= @_;
local( @dirs )= split( m-/+-, $glob, 9999 ); # (so trailing / isn't ignored)
local( @paths, @build, $dir, $file, $path );
for( 0 .. @dirs-2 ) { # Tack "/" to all but last component so fglob
$dirs[$_] .= "/"; # only returns directories for middle parts
}
pop( @dirs ) if "" eq $dirs[@dirs-1]; # In case $glob had a trailing /
if( "/" eq $dirs[0] ) {
$path= "/";
shift( @dirs );
} else {
$path= "";
}
@paths= grep( $_= $path . $_, &fglob( shift(@dirs), $path ) );
foreach $dir ( @dirs ) {
@build= ();
foreach $path ( @paths ) {
foreach $file ( &fglob( $dir, $path ) ) {
push( @build, "$path/$file" );
}
}
return () unless @build;
@paths= @build
}
@paths;
}
# &fglob( $glob [, $dir] ) - Expands a file wildcard, $glob, (a glob with
# no /'s, ie. no directories) into the list of matching Unix files found
# in the directory $dir (or "." if $dir not specified).
# In an array context, simply returns the list of matching files (not
# necessarilly sorted). It returns the empty list if no matches or if
# $dir can't be read (if $dir can't be read, $! will have the reason).
#BUG: There is no way to tell between zero matches vs. an error!
# In a scalar context, returns a context string (or undef if can't
# access the directory) that is used in subsequent calls to get each
# matching file one at a time (again, not necessarilly in sorted order).
# For example:
# $context= &fglob( "*.dat" );
# die "Can't read current directory: $!\n" unless defined($context);
# while( $_= &fglob( $context ) ) {
# if( ! &do_something( $_ ) ) {
# &fglob( $context, 1 );
# last;
# }
# }
# Note that you may use `&fglob($context,1)' to close the directory if
# you don't want to get *all* of the matching files ("1" can be anything
# but undef). Also, calling fglob with a context string in an array
# context returns all of the *remaining* matching files.
# fglob may suprise you in the following ways:
# - / is not supported (nothing will match) except as last character
# of $glob, in which case only directory names are returned (without
# trailing /s)
# - {a,b} will only return ("a","b") if the files "a" and "b" exist
# - [^a-z] is supported (any character except a through z, /, and \0)
# - \x is supported (expands to just "x" -- called "quoting")
# - If no unquoted wildcards ("?", "[", "*", or "{") appear in $glob,
# just returns $glob minus the \-quoting and trailing / (if any)
# even if no such file exists.
# - Files whose name begin with "." are not matched unless the first
# char of $glob is "." (neither "[^a-z]*" nor "[.x]*" match ".xyz").
# - {.,x}* matches x* but not .* (a bug, but difficult to solve)
# - If $File::KGlob::Safe is set to a true value, . and .. are not
# matched by any pattern (except "." and ".." themselves).
$Safe= 0; # Whether to exclude "." and ".." from all matches.
$Sort= 1; # Whether &glob() sorts the returned list.
$NextHndl= "File::KGlob::DIR0001";
sub fglob { # Expland a file-only glob (no /'s in the pattern)
local( $glob, $dir )= @_;
local( $re, $hndl, $nodots, $onlydirs, $match, @matches );
if( "\0" eq substr($glob,0,1) ) { # A context from a previous call:
( $hndl, $glob, $nodots, $onlydirs, $re )=
split( substr($glob,1), "\0" );
if( defined($dir) ) { # &fglob($context,0) means:
closedir( $hndl ); # prematurely end the search
return wantarray ? () : undef;
}
$dir= $glog;
} elsif( $glob !~ m/[\[\?\*\{\\]/ ) { # Contains no special chars: #}
chop $glob if "/" eq substr($glob,-1,1);
return( $glob );
} else {
$hndl= $NextHndl;
$NextHndl++ unless wantarray;
$nodots= "." ne substr($glob,0,1); # Skip all .*'s unless explicit
chop $glob if $onlydirs= "/" eq substr($glob,-1,1);
# File::KGlob2RE uses "%" for "any subdir(s)" but we don't so...
$glob =~ s#((^|[^\\])(\\\\)*)\%#\1\\%#g; # quote any unquoted "%"s.
$re= &File::KGlob2RE::kglob2re( $glob ); # Change glob to regexp.
$dir= "." if "" eq $dir;
if( ! opendir( $hndl, $dir ) ) {
return wantarray ? () : undef;
}
if( ! wantarray ) {
return "\0$hndl\0$dir\0$nodots\0$onlydirs\0$re";
}
}
while( $_= readdir( $hndl ) ) {
if( m/$re/ ) {
if( $nodots && "." eq substr($_,0,1) # 1-Don't match .*
|| $Safe && ( "." eq $_ || ".." eq $_ ) # 2-Don't match . or ..
|| $onlydirs && ! -d "$dir/$_" ) { # 3-Only match dirs
next; # 1-except explicitly (.*), 3-when $glob ends with /
} # 2-except exactly (. or ..) (when $Safe set)
return $_ unless wantarray;
push( @matches, $_ );
}
}
closedir( $hndl );
wantarray ? @matches : undef;
}
package main;
require File::Basename; import File::Basename qw(basename);
if( &basename( $0 ) eq &basename( __FILE__ ) ) {
# `KGlob.pm "pattern" [...]' to list matching files, one per line.
eval <<'EXAMPLE';
import File::KGlob qw(&glob);
sub quote { local($*)= 1; $_[1] =~ s/^$_[0]//g; $_[1]; }
if( 0 == @ARGV ) {
die "e( "\t*:\t", <<" ;" ), "\n";
: Usage: KGlob.pm "pattern" [...]
: Examples:
: KGlob.pm "*.c" | xargs grep boogers
: KGlob.pm "*.dat *.idx" | xargs chmod ug=rw,o=r
: Note that if only one argument is given and it contains one or
: more spaces, then it is split into several patterns because
: just using one set of quotes (") for the whole list is usually
: much easier. This splitting is *not* done if two or more
: arguments are given.
;
}
if( 1 == @ARGV && index($ARGV[0],' ') ) {
@ARGV= split( ' ', $ARGV[0] );
}
foreach( @ARGV ) {
@matches{ &glob($_) }= ();
}
foreach( sort keys %matches ) {
print "$_\n";
}
EXAMPLE
chop $@; die $@ if $@;
}
1;