home *** CD-ROM | disk | FTP | other *** search
- package File::GlobMapper;
-
- use strict;
- use warnings;
- use Carp;
-
- our ($CSH_GLOB);
-
- BEGIN
- {
- if ($] < 5.006)
- {
- require File::BSDGlob; import File::BSDGlob qw(:glob) ;
- $CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
- *globber = \&File::BSDGlob::csh_glob;
- }
- else
- {
- require File::Glob; import File::Glob qw(:glob) ;
- $CSH_GLOB = File::Glob::GLOB_CSH() ;
- #*globber = \&File::Glob::bsd_glob;
- *globber = \&File::Glob::csh_glob;
- }
- }
-
- our ($Error);
-
- our ($VERSION, @EXPORT_OK);
- $VERSION = '1.000';
- @EXPORT_OK = qw( globmap );
-
-
- our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
- $noPreBS = '(?<!\\\)' ; # no preceeding backslash
- $metachars = '.*?[](){}';
- $matchMetaRE = '[' . quotemeta($metachars) . ']';
-
- %mapping = (
- '*' => '([^/]*)',
- '?' => '([^/])',
- '.' => '\.',
- '[' => '([',
- '(' => '(',
- ')' => ')',
- );
-
- %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;
-
- sub globmap ($$;)
- {
- my $inputGlob = shift ;
- my $outputGlob = shift ;
-
- my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
- or croak "globmap: $Error" ;
- return $obj->getFileMap();
- }
-
- sub new
- {
- my $class = shift ;
- my $inputGlob = shift ;
- my $outputGlob = shift ;
- # TODO -- flags needs to default to whatever File::Glob does
- my $flags = shift || $CSH_GLOB ;
- #my $flags = shift ;
-
- $inputGlob =~ s/^\s*\<\s*//;
- $inputGlob =~ s/\s*\>\s*$//;
-
- $outputGlob =~ s/^\s*\<\s*//;
- $outputGlob =~ s/\s*\>\s*$//;
-
- my %object =
- ( InputGlob => $inputGlob,
- OutputGlob => $outputGlob,
- GlobFlags => $flags,
- Braces => 0,
- WildCount => 0,
- Pairs => [],
- Sigil => '#',
- );
-
- my $self = bless \%object, ref($class) || $class ;
-
- $self->_parseInputGlob()
- or return undef ;
-
- $self->_parseOutputGlob()
- or return undef ;
-
- my @inputFiles = globber($self->{InputGlob}, $flags) ;
-
- if (GLOB_ERROR)
- {
- $Error = $!;
- return undef ;
- }
-
- #if (whatever)
- {
- my $missing = grep { ! -e $_ } @inputFiles ;
-
- if ($missing)
- {
- $Error = "$missing input files do not exist";
- return undef ;
- }
- }
-
- $self->{InputFiles} = \@inputFiles ;
-
- $self->_getFiles()
- or return undef ;
-
- return $self;
- }
-
- sub _retError
- {
- my $string = shift ;
- $Error = "$string in input fileglob" ;
- return undef ;
- }
-
- sub _unmatched
- {
- my $delimeter = shift ;
-
- _retError("Unmatched $delimeter");
- return undef ;
- }
-
- sub _parseBit
- {
- my $self = shift ;
-
- my $string = shift ;
-
- my $out = '';
- my $depth = 0 ;
-
- while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
- {
- $out .= quotemeta($1) ;
- $out .= $mapping{$2} if defined $mapping{$2};
-
- ++ $self->{WildCount} if $wildCount{$2} ;
-
- if ($2 eq ',')
- {
- return _unmatched "("
- if $depth ;
-
- $out .= '|';
- }
- elsif ($2 eq '(')
- {
- ++ $depth ;
- }
- elsif ($2 eq ')')
- {
- return _unmatched ")"
- if ! $depth ;
-
- -- $depth ;
- }
- elsif ($2 eq '[')
- {
- # TODO -- quotemeta & check no '/'
- # TODO -- check for \] & other \ within the []
- $string =~ s#(.*?\])##
- or return _unmatched "[" ;
- $out .= "$1)" ;
- }
- elsif ($2 eq ']')
- {
- return _unmatched "]" ;
- }
- elsif ($2 eq '{' || $2 eq '}')
- {
- return _retError "Nested {} not allowed" ;
- }
- }
-
- $out .= quotemeta $string;
-
- return _unmatched "("
- if $depth ;
-
- return $out ;
- }
-
- sub _parseInputGlob
- {
- my $self = shift ;
-
- my $string = $self->{InputGlob} ;
- my $inGlob = '';
-
- # Multiple concatenated *'s don't make sense
- #$string =~ s#\*\*+#*# ;
-
- # TODO -- Allow space to delimit patterns?
- #my @strings = split /\s+/, $string ;
- #for my $str (@strings)
- my $out = '';
- my $depth = 0 ;
-
- while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
- {
- $out .= quotemeta($1) ;
- $out .= $mapping{$2} if defined $mapping{$2};
- ++ $self->{WildCount} if $wildCount{$2} ;
-
- if ($2 eq '(')
- {
- ++ $depth ;
- }
- elsif ($2 eq ')')
- {
- return _unmatched ")"
- if ! $depth ;
-
- -- $depth ;
- }
- elsif ($2 eq '[')
- {
- # TODO -- quotemeta & check no '/' or '(' or ')'
- # TODO -- check for \] & other \ within the []
- $string =~ s#(.*?\])##
- or return _unmatched "[";
- $out .= "$1)" ;
- }
- elsif ($2 eq ']')
- {
- return _unmatched "]" ;
- }
- elsif ($2 eq '}')
- {
- return _unmatched "}" ;
- }
- elsif ($2 eq '{')
- {
- # TODO -- check no '/' within the {}
- # TODO -- check for \} & other \ within the {}
-
- my $tmp ;
- unless ( $string =~ s/(.*?)$noPreBS\}//)
- {
- return _unmatched "{";
- }
- #$string =~ s#(.*?)\}##;
-
- #my $alt = join '|',
- # map { quotemeta $_ }
- # split "$noPreBS,", $1 ;
- my $alt = $self->_parseBit($1);
- defined $alt or return 0 ;
- $out .= "($alt)" ;
-
- ++ $self->{Braces} ;
- }
- }
-
- return _unmatched "("
- if $depth ;
-
- $out .= quotemeta $string ;
-
-
- $self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
- $self->{InputPattern} = $out ;
-
- #print "# INPUT '$self->{InputGlob}' => '$out'\n";
-
- return 1 ;
-
- }
-
- sub _parseOutputGlob
- {
- my $self = shift ;
-
- my $string = $self->{OutputGlob} ;
- my $maxwild = $self->{WildCount};
-
- if ($self->{GlobFlags} & GLOB_TILDE)
- #if (1)
- {
- $string =~ s{
- ^ ~ # find a leading tilde
- ( # save this in $1
- [^/] # a non-slash character
- * # repeated 0 or more times (0 means me)
- )
- }{
- $1
- ? (getpwnam($1))[7]
- : ( $ENV{HOME} || $ENV{LOGDIR} )
- }ex;
-
- }
-
- # max #1 must be == to max no of '*' in input
- while ( $string =~ m/#(\d)/g )
- {
- croak "Max wild is #$maxwild, you tried #$1"
- if $1 > $maxwild ;
- }
-
- my $noPreBS = '(?<!\\\)' ; # no preceeding backslash
- #warn "noPreBS = '$noPreBS'\n";
-
- #$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
- $string =~ s/${noPreBS}#(\d)/\${$1}/g;
- $string =~ s#${noPreBS}\*#\${inFile}#g;
- $string = '"' . $string . '"';
-
- #print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
- $self->{OutputPattern} = $string ;
-
- return 1 ;
- }
-
- sub _getFiles
- {
- my $self = shift ;
-
- my %outInMapping = ();
- my %inFiles = () ;
-
- foreach my $inFile (@{ $self->{InputFiles} })
- {
- next if $inFiles{$inFile} ++ ;
-
- my $outFile = $inFile ;
-
- if ( $inFile =~ m/$self->{InputPattern}/ )
- {
- no warnings 'uninitialized';
- eval "\$outFile = $self->{OutputPattern};" ;
-
- if (defined $outInMapping{$outFile})
- {
- $Error = "multiple input files map to one output file";
- return undef ;
- }
- $outInMapping{$outFile} = $inFile;
- push @{ $self->{Pairs} }, [$inFile, $outFile];
- }
- }
-
- return 1 ;
- }
-
- sub getFileMap
- {
- my $self = shift ;
-
- return $self->{Pairs} ;
- }
-
- sub getHash
- {
- my $self = shift ;
-
- return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
- }
-
- 1;
-
- __END__
-
- =head1 NAME
-
- File::GlobMapper - Extend File Glob to Allow Input and Output Files
-
- =head1 SYNOPSIS
-
- use File::GlobMapper qw( globmap );
-
- my $aref = globmap $input => $output
- or die $File::GlobMapper::Error ;
-
- my $gm = new File::GlobMapper $input => $output
- or die $File::GlobMapper::Error ;
-
-
- =head1 DESCRIPTION
-
- This module needs Perl5.005 or better.
-
- This module takes the existing C<File::Glob> module as a starting point and
- extends it to allow new filenames to be derived from the files matched by
- C<File::Glob>.
-
- This can be useful when carrying out batch operations on multiple files that
- have both an input filename and output filename and the output file can be
- derived from the input filename. Examples of operations where this can be
- useful include, file renaming, file copying and file compression.
-
-
- =head2 Behind The Scenes
-
- To help explain what C<File::GlobMapper> does, consider what code you
- would write if you wanted to rename all files in the current directory
- that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
- current directory
-
- alpha.tar.gz
- beta.tar.gz
- gamma.tar.gz
-
- and they need renamed to this
-
- alpha.tgz
- beta.tgz
- gamma.tgz
-
- Below is a possible implementation of a script to carry out the rename
- (error cases have been omitted)
-
- foreach my $old ( glob "*.tar.gz" )
- {
- my $new = $old;
- $new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
-
- rename $old => $new
- or die "Cannot rename '$old' to '$new': $!\n;
- }
-
- Notice that a file glob pattern C<*.tar.gz> was used to match the
- C<.tar.gz> files, then a fairly similar regular expression was used in
- the substitute to allow the new filename to be created.
-
- Given that the file glob is just a cut-down regular expression and that it
- has already done a lot of the hard work in pattern matching the filenames,
- wouldn't it be handy to be able to use the patterns in the fileglob to
- drive the new filename?
-
- Well, that's I<exactly> what C<File::GlobMapper> does.
-
- Here is same snippet of code rewritten using C<globmap>
-
- for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
- {
- my ($from, $to) = @$pair;
- rename $from => $to
- or die "Cannot rename '$old' to '$new': $!\n;
- }
-
- So how does it work?
-
- Behind the scenes the C<globmap> function does a combination of a
- file glob to match existing filenames followed by a substitute
- to create the new filenames.
-
- Notice how both parameters to C<globmap> are strings that are delimited by <>.
- This is done to make them look more like file globs - it is just syntactic
- sugar, but it can be handy when you want the strings to be visually
- distinctive. The enclosing <> are optional, so you don't have to use them - in
- fact the first thing globmap will do is remove these delimiters if they are
- present.
-
- The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>.
- Once the enclosing "< ... >" is removed, this is passed (more or
- less) unchanged to C<File::Glob> to carry out a file match.
-
- Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
- full Perl regular expression, with the additional step of wrapping each
- transformed wildcard metacharacter sequence in parenthesis.
-
- In this case the input fileglob C<*.tar.gz> will be transformed into
- this Perl regular expression
-
- ([^/]*)\.tar\.gz
-
- Wrapping with parenthesis allows the wildcard parts of the Input File
- Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
- the I<Output File Glob>. This parameter operates just like the replacement
- part of a substitute command. The difference is that the C<#1> syntax
- is used to reference sub-patterns matched in the input fileglob, rather
- than the C<$1> syntax that is used with perl regular expressions. In
- this case C<#1> is used to refer to the text matched by the C<*> in the
- Input File Glob. This makes it easier to use this module where the
- parameters to C<globmap> are typed at the command line.
-
- The final step involves passing each filename matched by the C<*.tar.gz>
- file glob through the derived Perl regular expression in turn and
- expanding the output fileglob using it.
-
- The end result of all this is a list of pairs of filenames. By default
- that is what is returned by C<globmap>. In this example the data structure
- returned will look like this
-
- ( ['alpha.tar.gz' => 'alpha.tgz'],
- ['beta.tar.gz' => 'beta.tgz' ],
- ['gamma.tar.gz' => 'gamma.tgz']
- )
-
-
- Each pair is an array reference with two elements - namely the I<from>
- filename, that C<File::Glob> has matched, and a I<to> filename that is
- derived from the I<from> filename.
-
-
-
- =head2 Limitations
-
- C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
- solve all filename mapping operations. Under the hood C<File::Glob> (or for
- older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
- will never have the flexibility of full Perl regular expression.
-
- =head2 Input File Glob
-
- The syntax for an Input FileGlob is identical to C<File::Glob>, except
- for the following
-
- =over 5
-
- =item 1.
-
- No nested {}
-
- =item 2.
-
- Whitespace does not delimit fileglobs.
-
- =item 3.
-
- The use of parenthesis can be used to capture parts of the input filename.
-
- =item 4.
-
- If an Input glob matches the same file more than once, only the first
- will be used.
-
- =back
-
- The syntax
-
- =over 5
-
- =item B<~>
-
- =item B<~user>
-
-
- =item B<.>
-
- Matches a literal '.'.
- Equivalent to the Perl regular expression
-
- \.
-
- =item B<*>
-
- Matches zero or more characters, except '/'. Equivalent to the Perl
- regular expression
-
- [^/]*
-
- =item B<?>
-
- Matches zero or one character, except '/'. Equivalent to the Perl
- regular expression
-
- [^/]?
-
- =item B<\>
-
- Backslash is used, as usual, to escape the next character.
-
- =item B<[]>
-
- Character class.
-
- =item B<{,}>
-
- Alternation
-
- =item B<()>
-
- Capturing parenthesis that work just like perl
-
- =back
-
- Any other character it taken literally.
-
- =head2 Output File Glob
-
- The Output File Glob is a normal string, with 2 glob-like features.
-
- The first is the '*' metacharacter. This will be replaced by the complete
- filename matched by the input file glob. So
-
- *.c *.Z
-
- The second is
-
- Output FileGlobs take the
-
- =over 5
-
- =item "*"
-
- The "*" character will be replaced with the complete input filename.
-
- =item #1
-
- Patterns of the form /#\d/ will be replaced with the
-
- =back
-
- =head2 Returned Data
-
-
- =head1 EXAMPLES
-
- =head2 A Rename script
-
- Below is a simple "rename" script that uses C<globmap> to determine the
- source and destination filenames.
-
- use File::GlobMapper qw(globmap) ;
- use File::Copy;
-
- die "rename: Usage rename 'from' 'to'\n"
- unless @ARGV == 2 ;
-
- my $fromGlob = shift @ARGV;
- my $toGlob = shift @ARGV;
-
- my $pairs = globmap($fromGlob, $toGlob)
- or die $File::GlobMapper::Error;
-
- for my $pair (@$pairs)
- {
- my ($from, $to) = @$pair;
- move $from => $to ;
- }
-
-
-
- Here is an example that renames all c files to cpp.
-
- $ rename '*.c' '#1.cpp'
-
- =head2 A few example globmaps
-
- Below are a few examples of globmaps
-
- To copy all your .c file to a backup directory
-
- '</my/home/*.c>' '</my/backup/#1.c>'
-
- If you want to compress all
-
- '</my/home/*.[ch]>' '<*.gz>'
-
- To uncompress
-
- '</my/home/*.[ch].gz>' '</my/home/#1.#2>'
-
- =head1 SEE ALSO
-
- L<File::Glob|File::Glob>
-
- =head1 AUTHOR
-
- The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.
-
- =head1 COPYRIGHT AND LICENSE
-
- Copyright (c) 2005 Paul Marquess. All rights reserved.
- This program is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
-