home *** CD-ROM | disk | FTP | other *** search
- # Mail::Address.pm
- #
- # Copyright (c) 1995-2001 Graham Barr <gbarr@pobox.com>. All rights reserved.
- # Copyright (c) 2002-2003 Mark Overmeer <mailtools@overmeer.net>
- # This program is free software; you can redistribute it and/or
- # modify it under the same terms as Perl itself.
-
- package Mail::Address;
- use strict;
-
- use Carp;
- use vars qw($VERSION);
- use locale;
-
- $VERSION = "1.62";
- sub Version { $VERSION }
-
- #
- # given a comment, attempt to extract a person's name
- #
-
- sub _extract_name
- {
- # This function can be called as method as well
- my $self = @_ && ref $_[0] ? shift : undef;
-
- local $_ = shift or return '';
-
- # Bug in unicode \U, perl 5.8.0 breaks when casing utf8 in regex
- if($] eq 5.008)
- { require utf8;
- eval 'utf8::downgrade($_)';
- }
-
- # trim whitespace
- s/^\s+//;
- s/\s+$//;
- s/\s+/ /;
-
- # Disregard numeric names (e.g. 123456.1234@compuserve.com)
- return "" if /^[\d ]+$/;
-
- # remove outermost parenthesis
- s/^\((.*)\)$/$1/g;
-
- # remove outer quotation marks
- s/^"|"$//g;
-
- # remove embedded comments
- s/\(.*\)//g;
-
- # reverse "Last, First M." if applicable
- s/^([^\s]+) ?, ?(.*)$/$2 $1/;
- s/,.*//;
-
- # Change casing only when the name contains only upper or only
- # lower cased characters.
- unless( m/[A-Z]/ && m/[a-z]/ )
- { # Set the case of the name to first char upper rest lower
- # Upcase first letter on name
- s/\b(\w+)/\L\u$1/igo;
-
- # Scottish names such as 'McLeod'
- s/\bMc(\w)/Mc\u$1/igo;
-
- # Irish names such as 'O'Malley, O'Reilly'
- s/\bo'(\w)/O'\u$1/igo;
-
- # Roman numerals, eg 'Level III Support'
- s/\b(x*(ix)?v*(iv)?i*)\b/\U$1/igo;
- }
-
- # some cleanup
- s/\[[^\]]*\]//g;
- s/(^[\s'"]+|[\s'"]+$)//g;
- s/\s{2,}/ /g;
-
- return $_;
- }
-
- sub _tokenise {
- local($_) = join(',', @_);
- my(@words,$snippet,$field);
-
- s/\A\s+//;
- s/[\r\n]+/ /g;
-
- while ($_ ne '')
- {
- $field = '';
- if( s/^\s*\(/(/ ) # (...)
- {
- my $depth = 0;
-
- PAREN: while(s/^(\(([^\(\)\\]|\\.)*)//)
- {
- $field .= $1;
- $depth++;
- while(s/^(([^\(\)\\]|\\.)*\)\s*)//)
- {
- $field .= $1;
- last PAREN unless --$depth;
- $field .= $1 if s/^(([^\(\)\\]|\\.)+)//;
- }
- }
-
- carp "Unmatched () '$field' '$_'"
- if $depth;
-
- $field =~ s/\s+\Z//;
- push(@words, $field);
-
- next;
- }
-
- s/^("([^"\\]|\\.)*")\s*// # "..."
- || s/^(\[([^\]\\]|\\.)*\])\s*// # [...]
- || s/^([^\s\Q()<>\@,;:\\".[]\E]+)\s*//
- || s/^([\Q()<>\@,;:\\".[]\E])\s*//
- and do { push(@words, $1); next; };
-
- croak "Unrecognised line: $_";
- }
-
- push(@words, ",");
-
- \@words;
- }
-
- sub _find_next {
- my $idx = shift;
- my $tokens = shift;
- my $len = shift;
- while($idx < $len) {
- my $c = $tokens->[$idx];
- return $c if($c eq "," || $c eq "<");
- $idx++;
- }
- return "";
- }
-
- sub _complete {
- my $pkg = shift;
- my $phrase = shift;
- my $address = shift;
- my $comment = shift;
- my $o = undef;
-
- if(@{$phrase} || @{$comment} || @{$address}) {
- $o = $pkg->new(join(" ",@{$phrase}),
- join("", @{$address}),
- join(" ",@{$comment}));
- @{$phrase} = ();
- @{$address} = ();
- @{$comment} = ();
- }
-
- return $o;
- }
-
-
- sub new {
- my $pkg = shift;
- my $me = bless [@_], $pkg;
- return $me;
- }
-
-
- sub parse {
- my $pkg = shift;
- my @line = grep { defined $_} @_;
- my $line = join '', @line;
-
- local $_;
-
- my @phrase = ();
- my @comment = ();
- my @address = ();
- my @objs = ();
- my $depth = 0;
- my $idx = 0;
- my $tokens = _tokenise(@line);
- my $len = @$tokens;
- my $next = _find_next($idx,$tokens,$len);
-
- for( ; $idx < $len ; $idx++) {
- $_ = $tokens->[$idx];
-
- if(substr($_,0,1) eq "(") {
- push(@comment,$_);
- }
- elsif($_ eq '<') {
- $depth++;
- }
- elsif($_ eq '>') {
- $depth-- if $depth;
- }
- elsif($_ eq ',') {
- warn "Unmatched '<>' in $line" if($depth);
- my $o = _complete($pkg,\@phrase, \@address, \@comment);
- push(@objs, $o) if(defined $o);
- $depth = 0;
- $next = _find_next($idx+1,$tokens,$len);
- }
- elsif($depth) {
- push(@address,$_);
- }
- elsif($next eq "<") {
- push(@phrase,$_);
- }
- elsif( /\A[\Q.\@:;\E]\Z/ || !@address || $address[-1] =~ /\A[\Q.\@:;\E]\Z/) {
- push(@address,$_);
- }
- else {
- warn "Unmatched '<>' in $line" if($depth);
- my $o = _complete($pkg,\@phrase, \@address, \@comment);
- push(@objs, $o) if(defined $o);
- $depth = 0;
- push(@address,$_);
- }
- }
- @objs;
- }
-
- sub set_or_get {
- my $me = shift;
- my $i = shift;
- my $val = $me->[$i];
-
- $me->[$i] = shift if(@_);
-
- $val;
- }
-
-
- sub phrase { set_or_get(shift,0,@_) }
- sub address { set_or_get(shift,1,@_) }
- sub comment { set_or_get(shift,2,@_) }
-
-
- sub format {
- my @fmts = ();
- my $me;
-
- my $atext = '[\-\w !#$%&\'*+/=?^`{|}~]';
-
- foreach $me (@_) {
- my($phrase,$addr,$comment) = @{$me};
- my @tmp = ();
-
- if(defined $phrase && length($phrase)) {
- push @tmp, $phrase =~ /^(?:\s*$atext\s*)+$/ ? $phrase
- : $phrase =~ /(?<!\\)"/ ? $phrase
- : qq("$phrase");
-
- push(@tmp, "<" . $addr . ">") if(defined $addr && length($addr));
- }
- else {
- push(@tmp, $addr) if(defined $addr && length($addr));
- }
- if(defined($comment) && $comment =~ /\S/) {
- $comment =~ s/^\s*\(?/(/;
- $comment =~ s/\)?\s*$/)/;
- }
- push(@tmp, $comment) if(defined $comment && length($comment));
- push(@fmts, join(" ", @tmp)) if(scalar(@tmp));
- }
-
- return join(", ", @fmts);
- }
-
-
- sub name
- {
- my $me = shift;
- my $phrase = $me->phrase;
- my $addr = $me->address;
-
- $phrase = $me->comment unless(defined($phrase) && length($phrase));
- my $name = $me->_extract_name($phrase);
-
- # first.last@domain address
- if($name eq '' && $addr =~ /([^\%\.\@_]+([\._][^\%\.\@_]+)+)[\@\%]/o)
- {
- ($name = $1) =~ s/[\._]+/ /go;
- $name = _extract_name($name);
- }
-
- if($name eq '' && $addr =~ m#/g=#oi)
- # X400 style address
- {
- my ($f) = $addr =~ m#g=([^/]*)#oi;
- my ($l) = $addr =~ m#s=([^/]*)#io;
-
- $name = _extract_name($f . " " . $l);
- }
-
- return length($name) ? $name : undef;
- }
-
-
- sub host {
- my $me = shift;
- my $addr = $me->address || '';
- my $i = rindex($addr,'@');
-
- my $host = ($i >= 0) ? substr($addr,$i+1) : undef;
-
- return $host;
- }
-
-
- sub user {
- my $me = shift;
- my $addr = $me->address;
- my $i = index($addr,'@');
-
- my $user = ($i >= 0) ? substr($addr,0,$i) : $addr;
-
- return $user;
- }
-
-
- sub path {
- return ();
- }
-
-
- sub canon {
- my $me = shift;
- return ($me->host, $me->user, $me->path);
- }
-
- 1;
-
-
- __END__
-
- =head1 NAME
-
- Mail::Address - Parse mail addresses
-
- =head1 SYNOPSIS
-
- use Mail::Address;
-
- my @addrs = Mail::Address->parse($line);
-
- foreach $addr (@addrs) {
- print $addr->format,"\n";
- }
-
- =head1 DESCRIPTION
-
- C<Mail::Address> extracts and manipulates RFC822 compilant email
- addresses. As well as being able to create C<Mail::Address> objects
- in the normal manner, C<Mail::Address> can extract addresses from
- the To and Cc lines found in an email message.
-
- =head1 CONSTRUCTORS
-
- =over 4
-
- =item new( PHRASE, ADDRESS, [ COMMENT ])
-
- Mail::Address->new("Perl5 Porters", "perl5-porters@africa.nicoh.com");
-
- Create a new C<Mail::Address> object which represents an address with the
- elements given. In a message these 3 elements would be seen like:
-
- PHRASE <ADDRESS> (COMMENT)
- ADDRESS (COMMENT)
-
- =item parse( LINE )
-
- Mail::Address->parse($line);
-
- Parse the given line a return a list of extracted C<Mail::Address> objects.
- The line would normally be one taken from a To,Cc or Bcc line in a message
-
- =back
-
- =head1 METHODS
-
- =over 4
-
- =item phrase ()
-
- Return the phrase part of the object.
-
- =item address ()
-
- Return the address part of the object.
-
- =item comment ()
-
- Return the comment part of the object
-
- =item format ()
-
- Return a string representing the address in a suitable form to be placed
- on a To,Cc or Bcc line of a message
-
- =item name ()
-
- Using the information contained within the object attempt to identify what
- the person or groups name is
-
- =item host ()
-
- Return the address excluding the user id and '@'
-
- =item user ()
-
- Return the address excluding the '@' and the mail domain
-
- =item path ()
-
- Unimplemented yet but should return the UUCP path for the message
-
- =item canon ()
-
- Unimplemented yet but should return the UUCP canon for the message
-
- =back
-
- =head1 AUTHOR
-
- Graham Barr. Maintained by Mark Overmeer <mailtools@overmeer.net>
-
- =head1 COPYRIGHT
-
- Copyright (c) 2002-2003 Mark Overmeer, 1995-2001 Graham Barr. All rights
- reserved. This program is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
-
- =cut
-
-