home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.f-secure.com
/
2014.06.ftp.f-secure.com.tar
/
ftp.f-secure.com
/
support
/
hotfix
/
fsis
/
IS-SpamControl.fsfix
/
iufssc
/
lib
/
Mail
/
SpamAssassin
/
EvalTests.pm
< prev
next >
Wrap
Text File
|
2006-11-29
|
94KB
|
3,187 lines
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
package Mail::SpamAssassin::EvalTests;
1;
package Mail::SpamAssassin::PerMsgStatus;
use strict;
use warnings;
use bytes;
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Dns;
use Mail::SpamAssassin::Locales;
use Mail::SpamAssassin::MailingList;
use Mail::SpamAssassin::PerMsgStatus;
use Mail::SpamAssassin::Constants qw(:ip);
use Digest::SHA1 qw(sha1_hex);
use Fcntl;
use File::Path;
use Time::Local;
use File::Basename;
#use constant HAS_DB_FILE => eval { require DB_File; };
use constant HAS_SDBM => eval { require SDBM_File; };
use vars qw{
$CCTLDS_WITH_LOTS_OF_OPEN_RELAYS
$ROUND_THE_WORLD_RELAYERS
$WORD_OBFUSCATION_CHARS
$CHARSETS_LIKELY_TO_FP_AS_CAPS
};
# sad but true. sort it out, sysadmins!
$CCTLDS_WITH_LOTS_OF_OPEN_RELAYS = qr{(?:kr|cn|cl|ar|hk|il|th|tw|sg|za|tr|ma|ua|in|pe|br)};
$ROUND_THE_WORLD_RELAYERS = qr{(?:net|com|ca)};
# Here's how that RE was determined... relay rape by country (as of my
# spam collection on Dec 12 2001):
#
# 10 in 10 ua 11 ma 11 tr 11 za 12 gr
# 13 pl 14 se 15 hu 17 sg 19 dk 19 pt
# 19 th 21 us 22 hk 24 il 26 ch 27 ar
# 27 es 29 cz 32 cl 32 mx 37 nl 38 fr
# 41 it 43 ru 59 au 62 uk 67 br 70 ca
# 104 tw 111 de 123 jp 130 cn 191 kr
#
# However, since some ccTLDs just have more hosts/domains (skewing those
# figures), I cut down this list using data from
# http://www.isc.org/ds/WWW-200107/. I used both hostcount and domain counts
# for figuring this. any ccTLD with > about 40000 domains is left out of this
# regexp. Then I threw in some unscientific seasoning to taste. ;)
$WORD_OBFUSCATION_CHARS = '*_.,/|-+=';
# Charsets which use capital letters heavily in their encoded representation.
$CHARSETS_LIKELY_TO_FP_AS_CAPS = qr{[-_a-z0-9]*(?:
koi|jp|jis|euc|gb|big5|isoir|cp1251|georgianps|pt154|tis
)[-_a-z0-9]*}ix;
###########################################################################
# HEAD TESTS:
###########################################################################
# From and To have same address, but are not exactly the same and
# neither contains intermediate spaces.
sub check_for_from_to_same {
my ($self) = @_;
my $hdr_from = $self->get('From');
my $hdr_to = $self->get('To');
return 0 if (!length($hdr_from) || !length($hdr_to) ||
$hdr_from eq $hdr_to);
my $addr_from = $self->get('From:addr');
my $addr_to = $self->get('To:addr');
# BUG: From:addr and To:addr sometimes contain whitespace
$addr_from =~ s/\s+//g;
$addr_to =~ s/\s+//g;
return 0 if (!length($addr_from) || !length($addr_to) ||
$addr_from ne $addr_to);
if ($hdr_from =~ /^\s*\S+\s*$/ && $hdr_to =~ /^\s*\S+\s*$/) {
return 1;
}
}
sub check_for_matching_env_and_hdr_from {
my ($self) =@_;
# two blank headers match so don't bother checking
return (lc $self->get('EnvelopeFrom:addr') eq lc $self->get('From:addr'));
}
sub sorted_recipients {
my ($self) = @_;
if (!exists $self->{tocc_sorted}) {
$self->_check_recipients();
}
return $self->{tocc_sorted};
}
sub similar_recipients {
my ($self, $min, $max) = @_;
if (!exists $self->{tocc_similar}) {
$self->_check_recipients();
}
return (($min eq 'undef' || $self->{tocc_similar} >= $min) &&
($max eq 'undef' || $self->{tocc_similar} < $max));
}
# best experimentally derived values
use constant TOCC_SORTED_COUNT => 7;
use constant TOCC_SIMILAR_COUNT => 5;
use constant TOCC_SIMILAR_LENGTH => 2;
sub _check_recipients {
my ($self) = @_;
my @inputs;
# ToCc: pseudo-header works best, but sometimes Bcc: is better
for ('ToCc', 'Bcc') {
my $to = $self->get($_); # get recipients
$to =~ s/\(.*?\)//g; # strip out the (comments)
push(@inputs, ($to =~ m/([\w.=-]+\@\w+(?:[\w.-]+\.)+\w+)/g));
last if scalar(@inputs) >= TOCC_SIMILAR_COUNT;
}
# remove duplicate addresses only when they appear next to each other
my @address;
my $previous = '';
while (my $current = shift @inputs) {
push(@address, ($previous = $current)) if lc($current) ne lc($previous);
last if @address == 256;
}
# ideas that had both poor S/O ratios and poor hit rates:
# - testing for reverse sorted recipient lists
# - testing To: and Cc: headers separately
$self->{tocc_sorted} = (scalar(@address) >= TOCC_SORTED_COUNT &&
join(',', @address) eq (join(',', sort @address)));
# a good S/O ratio and hit rate is achieved by comparing 2-byte
# substrings and requiring 5 or more addresses
$self->{tocc_similar} = 0;
if (scalar (@address) >= TOCC_SIMILAR_COUNT) {
my @user = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @address;
my @fqhn = map { m/\@(.*)/ } @address;
my @host = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @fqhn;
my $hits = 0;
my $combinations = 0;
for (my $i = 0; $i <= $#address; $i++) {
for (my $j = $i+1; $j <= $#address; $j++) {
$hits++ if $user[$i] eq $user[$j];
$hits++ if $host[$i] eq $host[$j] && $fqhn[$i] ne $fqhn[$j];
$combinations++;
}
}
$self->{tocc_similar} = $hits / $combinations;
}
}
###########################################################################
# Message-ID for untrusted message was added by a trusted relay
sub message_id_from_mta {
my ($self) = @_;
my $id = $self->get('MESSAGEID');
if ($id && $self->{num_relays_untrusted} > 0) {
for my $rcvd (@{$self->{relays_untrusted}}[0], @{$self->{relays_trusted}})
{
return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);
}
}
return 0;
}
###########################################################################
# FORGED_RCVD_TRAIL
sub check_for_forged_received_trail {
my ($self) = @_;
$self->_check_for_forged_received unless exists $self->{mismatch_from};
return ($self->{mismatch_from} > 1);
}
# FORGED_RCVD_HELO
sub check_for_forged_received_helo {
my ($self) = @_;
$self->_check_for_forged_received unless exists $self->{mismatch_helo};
return ($self->{mismatch_helo} > 0);
}
# FORGED_RCVD_IP_HELO
sub check_for_forged_received_ip_helo {
my ($self) = @_;
$self->_check_for_forged_received unless exists $self->{mismatch_ip_helo};
return ($self->{mismatch_ip_helo} > 0);
}
sub _check_for_forged_received {
my ($self) = @_;
$self->{mismatch_from} = 0;
$self->{mismatch_helo} = 0;
$self->{mismatch_ip_helo} = 0;
my $IP_PRIVATE = IP_PRIVATE;
my @fromip = map { $_->{ip} } @{$self->{relays_untrusted}};
# just pick up domains for these
my @by = map {
hostname_to_domain ($_->{lc_by});
} @{$self->{relays_untrusted}};
my @from = map {
hostname_to_domain ($_->{lc_rdns});
} @{$self->{relays_untrusted}};
my @helo = map {
hostname_to_domain ($_->{lc_helo});
} @{$self->{relays_untrusted}};
for (my $i = 0; $i < $self->{num_relays_untrusted}; $i++) {
next if (!defined $by[$i] || $by[$i] !~ /^\w+(?:[\w.-]+\.)+\w+$/);
if (defined ($from[$i]) && defined($fromip[$i])) {
if ($from[$i] =~ /^localhost(?:\.localdomain)?$/) {
if ($fromip[$i] eq '127.0.0.1') {
# valid: bouncing around inside 1 machine, via the localhost
# interface (freshmeat newsletter does this). TODO: this
# may be obsolete, I think we do this in Received.pm anyway
$from[$i] = undef;
}
}
}
my $frm = $from[$i];
my $hlo = $helo[$i];
my $by = $by[$i];
dbg("eval: forged-HELO: from=".(defined $frm ? $frm : "(undef)").
" helo=".(defined $hlo ? $hlo : "(undef)").
" by=".(defined $by ? $by : "(undef)"));
# note: this code won't catch IP-address HELOs, but we already have
# a separate rule for that anyway.
next unless ($by =~ /^\w+(?:[\w.-]+\.)+\w+$/);
if (defined($hlo) && defined($frm)
&& $hlo =~ /^\w+(?:[\w.-]+\.)+\w+$/
&& $frm =~ /^\w+(?:[\w.-]+\.)+\w+$/
&& $frm ne $hlo && !helo_forgery_whitelisted($frm, $hlo))
{
dbg("eval: forged-HELO: mismatch on HELO: '$hlo' != '$frm'");
$self->{mismatch_helo}++;
}
my $fip = $fromip[$i];
if (defined($hlo) && defined($fip)) {
if ($hlo =~ /^\d+\.\d+\.\d+\.\d+$/
&& $fip =~ /^\d+\.\d+\.\d+\.\d+$/
&& $fip ne $hlo)
{
$hlo =~ /^(\d+\.\d+)\.\d+\.\d+$/; my $hclassb = $1;
$fip =~ /^(\d+\.\d+)\.\d+\.\d+$/; my $fclassb = $1;
# allow private IP addrs here, could be a legit screwup
if ($hclassb && $fclassb &&
$hclassb ne $fclassb &&
!($hlo =~ /$IP_PRIVATE/o))
{
dbg("eval: forged-HELO: massive mismatch on IP-addr HELO: '$hlo' != '$fip'");
$self->{mismatch_ip_helo}++;
}
}
}
my $prev = $from[$i-1];
if (defined($prev) && $i > 0
&& $prev =~ /^\w+(?:[\w.-]+\.)+\w+$/
&& $by ne $prev && !helo_forgery_whitelisted($by, $prev))
{
dbg("eval: forged-HELO: mismatch on from: '$prev' != '$by'");
$self->{mismatch_from}++;
}
}
}
sub helo_forgery_whitelisted {
my ($helo, $rdns) = @_;
if ($helo eq 'msn.com' && $rdns eq 'hotmail.com') { return 1; }
0;
}
sub hostname_to_domain {
my ($hostname) = @_;
if ($hostname !~ /[a-zA-Z]/) { return $hostname; } # IP address
my @parts = split(/\./, $hostname);
if (@parts > 1 && $parts[-1] =~ /(?:\S{3,}|ie|fr|de)/) {
return join('.', @parts[-2..-1]);
}
elsif (@parts > 2) {
return join('.', @parts[-3..-1]);
}
else {
return $hostname;
}
}
# FORGED_HOTMAIL_RCVD
sub _check_for_forged_hotmail_received_headers {
my ($self) = @_;
if (defined $self->{hotmail_addr_but_no_hotmail_received}) { return; }
$self->{hotmail_addr_with_forged_hotmail_received} = 0;
$self->{hotmail_addr_but_no_hotmail_received} = 0;
my $rcvd = $self->get('Received');
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
return if ($rcvd =~
/from mail pickup service by hotmail\.com with Microsoft SMTPSVC;/);
# Microsoft passes Hotmail mail directly to MSN Group servers.
return if $self->check_for_msn_groups_headers();
my $ip = $self->get('X-Originating-Ip');
my $IP_ADDRESS = IP_ADDRESS;
if ($ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
# Hotmail formats its received headers like this:
# Received: from hotmail.com (f135.law8.hotmail.com [216.33.241.135])
# spammers do not ;)
if ($self->gated_through_received_hdr_remover()) { return; }
if ($rcvd =~ /from \S*hotmail.com \(\S+\.hotmail(?:\.msn)?\.com[ \)]/ && $ip)
{ return; }
if ($rcvd =~ /from \S+ by \S+\.hotmail(?:\.msn)?\.com with HTTP\;/ && $ip)
{ return; }
if ($rcvd =~ /from \[66\.218.\S+\] by \S+\.yahoo\.com/ && $ip)
{ return; }
if ($rcvd =~ /(?:from |HELO |helo=)\S*hotmail\.com\b/) {
# HELO'd as hotmail.com, despite not being hotmail
$self->{hotmail_addr_with_forged_hotmail_received} = 1;
} else {
# check to see if From claimed to be @hotmail.com
my $from = $self->get('From:addr');
if ($from !~ /hotmail.com/) { return; }
$self->{hotmail_addr_but_no_hotmail_received} = 1;
}
}
# FORGED_HOTMAIL_RCVD
sub check_for_forged_hotmail_received_headers {
my ($self) = @_;
$self->_check_for_forged_hotmail_received_headers();
return $self->{hotmail_addr_with_forged_hotmail_received};
}
# SEMIFORGED_HOTMAIL_RCVD
sub check_for_no_hotmail_received_headers {
my ($self) = @_;
$self->_check_for_forged_hotmail_received_headers();
return $self->{hotmail_addr_but_no_hotmail_received};
}
# MSN_GROUPS
sub check_for_msn_groups_headers {
my ($self) = @_;
return 0 unless ($self->get('To') =~ /<(\S+)\@groups\.msn\.com>/i);
my $listname = $1;
# from Theo Van Dinter, see
# http://www.hughes-family.org/bugzilla/show_bug.cgi?id=591
# Updated by DOS, based on messages from Bob Menschel, bug 4301
return 0 unless $self->get('Received') =~ /from mail pickup service by ((?:p\d\d\.)groups\.msn\.com)\b/;
my $server = $1;
if ($listname =~ /^notifications$/) {
return 0 unless $self->get('Message-Id') =~ /^<\S+\@$server>/;
} else {
return 0 unless $self->get('Message-Id') =~ /^<$listname-\S+\@groups\.msn\.com>/;
return 0 unless $self->get('EnvelopeFrom') =~ /$listname-bounce\@groups\.msn\.com/;
}
return 1;
# MSN Groups
# Return-path: <ListName-bounce@groups.msn.com>
# Received: from groups.msn.com (tk2dcpuba02.msn.com [65.54.195.210]) by
# dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id g72K35v10457 for
# <zzzzzzzzzzzz@jmason.org>; Fri, 2 Aug 2002 21:03:05 +0100
# Received: from mail pickup service by groups.msn.com with Microsoft
# SMTPSVC; Fri, 2 Aug 2002 13:01:30 -0700
# Message-id: <ListName-1392@groups.msn.com>
# X-loop: notifications@groups.msn.com
# Reply-to: "List Full Name" <ListName@groups.msn.com>
# To: "List Full Name" <ListName@groups.msn.com>
# Return-path: <ListName-bounce@groups.msn.com>
# Received: from p04.groups.msn.com ([65.54.195.216]) etc...
# Received: from mail pickup service by p04.groups.msn.com with Microsoft SMTPSVC;
# Thu, 5 May 2005 20:30:37 -0700
# X-Originating-Ip: 207.68.170.30
# From: =?iso-8859-1?B?IqSj4/D9pEbzeN9s9vLw6qQiIA==?=<zzzzzzzz@hotmail.com>
# To: "Managers of List Name" <notifications@groups.msn.com>
# Subject: =?iso-8859-1?Q?APPROVAL_NEEDED:_=A4=A3=E3=F0=FD=A4F=F3x=DFl?=
# =?iso-8859-1?Q?=F6=F2=F0=EA=A4_applied_to_join_List_Name=2C?=
# =?iso-8859-1?Q?_an_MSN_Group?=
# Date: Thu, 5 May 2005 20:30:37 -0700
# MIME-Version: 1.0
# Content-Type: multipart/alternative;
# boundary="----=_NextPart_000_333944_01C551B1.4BBA02B0"
# X-MimeOLE: Produced By Microsoft MimeOLE V5.50.4927.1200
# Message-ID: <TK2DCPUBA042cv0aGlt00020aa3@p04.groups.msn.com>
# Return-path: <ListName-bounce@groups.msn.com>
# Received: from [65.54.208.83] (helo=p05.groups.msn.com) etc...
# Received: from mail pickup service by p05.groups.msn.com with Microsoft SMTPSVC;
# Fri, 6 May 2005 14:59:25 -0700
# X-Originating-Ip: 207.68.170.30
# Message-Id: <ListName-101@groups.msn.com>
# Reply-To: "List Name" <ListName@groups.msn.com>
# From: "whoever" <zzzzzzzzzz@hotmail.com>
# To: "List Name" <ListName@groups.msn.com>
# Subject: whatever
# Date: Fri, 6 May 2005 14:59:25 -0700
}
###########################################################################
sub check_for_forged_eudoramail_received_headers {
my ($self) = @_;
my $from = $self->get('From:addr');
if ($from !~ /eudoramail.com/) { return 0; }
my $rcvd = $self->get('Received');
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
my $ip = $self->get('X-Sender-Ip');
my $IP_ADDRESS = IP_ADDRESS;
if ($ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
# Eudoramail formats its received headers like this:
# Received: from Unknown/Local ([?.?.?.?]) by shared1-mail.whowhere.com;
# Thu Nov 29 13:44:25 2001
# Message-Id: <JGDHDEHPPJECDAAA@shared1-mail.whowhere.com>
# Organization: QUALCOMM Eudora Web-Mail (http://www.eudoramail.com:80)
# X-Sender-Ip: 192.175.21.146
# X-Mailer: MailCity Service
if ($self->gated_through_received_hdr_remover()) { return 0; }
if ($rcvd =~ /by \S*whowhere.com\;/ && $ip) { return 0; }
return 1;
}
###########################################################################
sub check_for_forged_excite_received_headers {
my ($self) = @_;
my $from = $self->get('From:addr');
if ($from !~ /excite.com/) { return 0; }
my $rcvd = $self->get('Received');
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
# Excite formats its received headers like this:
# Received: from bucky.excite.com ([198.3.99.218]) by vaxc.cc.monash.edu.au
# (PMDF V6.0-24 #38147) with ESMTP id
# <01K53WHA3OGCA5W9MM@vaxc.cc.monash.edu.au> for luv@luv.asn.au;
# Sat, 23 Jun 2001 13:36:20 +1000
# Received: from hippie.excite.com ([199.172.148.180]) by bucky.excite.com
# (InterMail vM.4.01.02.39 201-229-119-122) with ESMTP id
# <20010623033612.NRCY6361.bucky.excite.com@hippie.excite.com> for
# <luv@luv.asn.au>; Fri, 22 Jun 2001 20:36:12 -0700
# spammers do not ;)
if ($self->gated_through_received_hdr_remover()) { return 0; }
if ($rcvd =~ /from \S*excite.com (\S+) by \S*excite.com/) { return 0; }
return 1;
}
###########################################################################
sub check_for_forged_yahoo_received_headers {
my ($self) = @_;
my $from = $self->get('From:addr');
if ($from !~ /yahoo\.com$/) { return 0; }
my $rcvd = $self->get('Received');
if ($self->get("Resent-From") && $self->get("Resent-To")) {
my $xrcvd = $self->get("X-Received");
$rcvd = $xrcvd if $xrcvd;
}
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
# not sure about this
#if ($rcvd !~ /from \S*yahoo\.com/) { return 0; }
if ($self->gated_through_received_hdr_remover()) { return 0; }
# bug 3740: ignore bounces from Yahoo!. only honoured if the
# correct rDNS shows up in the trusted relay list, or first untrusted relay
if ($from eq 'MAILER-DAEMON@yahoo.com' &&
($self->{relays_trusted_str} =~ / rdns=\S+\.yahoo\.com /
|| $self->{relays_untrusted_str} =~ /^[^\]]+ rdns=\S+\.yahoo\.com /))
{ return 0; }
if ($rcvd =~ /by web\S+\.mail\S*\.yahoo\.com via HTTP/) { return 0; }
if ($rcvd =~ /by smtp\S+\.yahoo\.com with SMTP/) { return 0; }
my $IP_ADDRESS = IP_ADDRESS;
if ($rcvd =~
/from \[$IP_ADDRESS\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/) {
return 0;
}
# used in "forward this news item to a friend" links. There's no better
# received hdrs to match on, unfortunately. I'm not sure if the next test is
# still useful, as a result.
#
# search for msgid <20020929140301.451A92940A9@xent.com>, subject "Yahoo!
# News Story - Top Stories", date Sep 29 2002 on
# <http://xent.com/pipermail/fork/> for an example.
#
if ($rcvd =~ /\bmailer\d+\.bulk\.scd\.yahoo\.com\b/
&& $from =~ /\@reply\.yahoo\.com$/) { return 0; }
if ($rcvd =~ /by \w+\.\w+\.yahoo\.com \(\d+\.\d+\.\d+\/\d+\.\d+\.\d+\)(?: with ESMTP)? id \w+/) {
# possibly sent from "mail this story to a friend"
return 0;
}
return 1;
}
sub check_for_forged_juno_received_headers {
my ($self) = @_;
my $from = $self->get('From:addr');
if($from !~ /\bjuno.com/) { return 0; }
if($self->gated_through_received_hdr_remover()) { return 0; }
my $xmailer = $self->get('X-Mailer');
my $xorig = $self->get('X-Originating-IP');
my $rcvd = $self->get('Received');
my $IP_ADDRESS = IP_ADDRESS;
if (!$xorig) { # New style Juno has no X-Originating-IP header, and other changes
if($rcvd !~ /from.*\b(?:juno|untd)\.com.*[\[\(]$IP_ADDRESS[\]\)].*by/
&& $rcvd !~ / cookie\.(?:juno|untd)\.com /) { return 1; }
if($xmailer !~ /Juno /) { return 1; }
} else {
if($rcvd =~ /from.*\bmail\.com.*\[$IP_ADDRESS\].*by/) {
if($xmailer !~ /\bmail\.com/) { return 1; }
} elsif($rcvd =~ /from (webmail\S+\.untd\.com) \(\1 \[$IP_ADDRESS\]\) by/) {
if($xmailer !~ /^Webmail Version \d/) { return 1; }
} else {
return 1;
}
if($xorig !~ /$IP_ADDRESS/) { return 1; }
}
return 0;
}
#Received: from dragnet.sjc.ebay.com (dragnet.sjc.ebay.com [10.6.21.14])
# by bashir.ebay.com (8.10.2/8.10.2) with SMTP id g29JpwB10940
# for <rod@begbie.com>; Sat, 9 Mar 2002 11:51:58 -0800
sub check_for_from_domain_in_received_headers {
my ($self, $domain, $desired) = @_;
if (exists $self->{from_domain_in_received}) {
if (exists $self->{from_domain_in_received}->{$domain}) {
if ($desired eq 'true') {
# See use of '0e0' below for why we force int() here:
return int($self->{from_domain_in_received}->{$domain});
}
else {
# And why we deliberately do NOT use integers here:
return !$self->{from_domain_in_received}->{$domain};
}
}
} else {
$self->{from_domain_in_received} = {};
}
my $from = $self->get('From:addr');
if ($from !~ /\b\Q$domain\E/i) {
# '0e0' is Perl idiom for "true but zero":
$self->{from_domain_in_received}->{$domain} = '0e0';
return 0;
}
my $rcvd = $self->{relays_trusted_str}."\n".$self->{relays_untrusted_str};
if ($rcvd =~ / rdns=\S*\b${domain} [^\]]*by=\S*\b${domain} /) {
$self->{from_domain_in_received}->{$domain} = 1;
return ($desired eq 'true');
}
$self->{from_domain_in_received}->{$domain} = 0;
return ($desired ne 'true');
}
# ezmlm has a very bad habit of removing Received: headers! bad ezmlm.
#
sub gated_through_received_hdr_remover {
my ($self) = @_;
my $txt = $self->get("Mailing-List");
if (defined $txt && $txt =~ /^contact \S+\@\S+\; run by ezmlm$/) {
my $dlto = $self->get("Delivered-To");
my $rcvd = $self->get("Received");
# ensure we have other indicative headers too
if ($dlto =~ /^mailing list \S+\@\S+/ &&
$rcvd =~ /qmail \d+ invoked (?:from network|by .{3,20})\); \d+ ... \d+/)
{
return 1;
}
}
if ($self->get("Received") !~ /\S/) {
# we have no Received headers! These tests cannot run in that case
return 1;
}
# MSN groups removes Received lines. thanks MSN
if ($self->get("Received") =~ /from groups\.msn\.com \(\S+\.msn\.com /) {
return 1;
}
return 0;
}
###########################################################################
# Bug 1133
# Some spammers will, through HELO, tell the server that their machine
# name *is* the relay; don't know why. An example:
# from mail1.mailwizards.com (m448-mp1.cvx1-b.col.dial.ntli.net
# [213.107.233.192])
# by mail1.mailwizards.com
# When this occurs for real, the from name and HELO name will be the
# same, unless the "helo" name is localhost, or the from and by hostsnames
# themselves are localhost
sub _check_received_helos {
my ($self) = @_;
for (my $i = 0; $i < $self->{num_relays_untrusted}; $i++) {
my $rcvd = $self->{relays_untrusted}->[$i];
# Ignore where IP is in private IP space
next if ($rcvd->{ip_private});
my $from_host = $rcvd->{rdns};
my $helo_host = $rcvd->{helo};
my $by_host = $rcvd->{by};
my $no_rdns = $rcvd->{no_reverse_dns};
next unless defined($helo_host);
# Check for a faked dotcom HELO, e.g.
# Received: from mx02.hotmail.com (www.sucasita.com.mx [148.223.251.99])...
# this can be a stronger spamsign than the normal case, since the
# big dotcoms don't screw up their rDNS normally ;), so less FPs.
# Since spammers like sending out their mails from the dotcoms (esp.
# hotmail and AOL) this will catch those forgeries.
#
# allow stuff before the dot-com for both from-name and HELO-name,
# so HELO="outgoing.aol.com" and from="mx34853495.mx.aol.com" works OK.
#
$self->{no_rdns_dotcom_helo} = 0;
if ($helo_host =~ /(?:\.|^)(lycos\.com|lycos\.co\.uk|hotmail\.com
|localhost\.com|excite\.com|caramail\.com
|cs\.com|aol\.com|msn\.com|yahoo\.com|drizzle\.com)$/ix)
{
my $dom = $1;
# ok, let's catch the case where there's *no* reverse DNS there either
if ($no_rdns) {
dbg("eval: Received: no rDNS for dotcom HELO: from=$from_host HELO=$helo_host");
$self->{no_rdns_dotcom_helo} = 1;
}
}
}
} # _check_received_helos()
sub check_for_no_rdns_dotcom_helo {
my ($self) = @_;
if (!exists $self->{no_rdns_dotcom_helo}) { $self->_check_received_helos(@_); }
return $self->{no_rdns_dotcom_helo};
}
###########################################################################
# look for 8-bit and other illegal characters that should be MIME
# encoded, these might want to exempt languages that do not use
# Latin-based alphabets, but only if the user wants it that way
sub check_illegal_chars {
my ($self, $header, $ratio, $count) = @_;
$header .= ":raw" unless ($header eq "ALL" || $header =~ /:raw$/);
my $str = $self->get($header);
return 0 unless $str;
# avoid overlap between tests
if ($header eq "ALL") {
# fix continuation lines, then remove Subject and From
$str =~ s/\n[ \t]+/ /gs;
$str =~ s/^(?:Subject|From):.*$//gm;
}
# count illegal substrings (RFC 2045)
my $illegal = () = ($str =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/g);
# minor exemptions for Subject
if ($header eq "Subject:raw") {
# only exempt a single cent sign, pound sign, or registered sign
my $exempt = () = ($str =~ /[\xa2\xa3\xae]/g);
$illegal-- if $exempt == 1;
}
return 0 if (length($str) == 0);
return (($illegal / length($str)) >= $ratio && $illegal >= $count);
}
sub are_more_high_bits_set {
my ($self, $str) = @_;
my $numhis = () = ($str =~ /[\200-\377]/g);
my $numlos = length($str) - $numhis;
($numlos <= $numhis && $numhis > 3);
}
###########################################################################
sub check_for_missing_to_header {
my ($self) = @_;
my $hdr = $self->get('To');
$hdr ||= $self->get('Apparently-To');
return 1 if ($hdr eq '');
return 0;
}
###########################################################################
# Check if the apparent sender (in the last received header) had
# no reverse lookup for it's IP
#
# Look for headers like:
#
# Received: from mx1.eudoramail.com ([204.32.147.84])
sub check_for_sender_no_reverse {
my ($self) = @_;
# Sender received header is the last in the sequence
my $srcvd = $self->{relays_untrusted}->
[$self->{num_relays_untrusted} - 1];
return 0 unless (defined $srcvd);
# Ignore if the from host is domainless (has no dot)
return 0 unless ($srcvd->{rdns} =~ /\./);
# Ignore if the from host is from a private IP range
return 0 if ($srcvd->{ip_private});
return 1;
} # check_for_sender_no_reverse()
###########################################################################
sub check_from_in_list {
my ($self,$list) = @_;
my $list_ref = $self->{conf}{$list};
warn "eval: could not find list $list" unless defined $list_ref;
foreach my $addr (all_from_addrs $self) {
return 1 if _check_whitelist $self $list_ref, $addr;
}
return 0;
}
###########################################################################
sub check_to_in_list {
my ($self,$list) = @_;
my $list_ref = $self->{conf}{$list};
warn "eval: could not find list $list" unless defined $list_ref;
foreach my $addr (all_to_addrs $self) {
return 1 if _check_whitelist $self $list_ref, $addr;
}
return 0;
}
###########################################################################
sub check_from_in_whitelist {
my ($self) = @_;
$self->_check_from_in_whitelist unless exists $self->{from_in_whitelist};
return ($self->{from_in_whitelist} > 0);
}
sub check_forged_in_whitelist {
my ($self) = @_;
$self->_check_from_in_whitelist unless exists $self->{from_in_whitelist};
$self->_check_from_in_default_whitelist unless exists $self->{from_in_default_whitelist};
return ($self->{from_in_whitelist} < 0) && ($self->{from_in_default_whitelist} == 0);
}
sub check_from_in_default_whitelist {
my ($self) = @_;
$self->_check_from_in_default_whitelist unless exists $self->{from_in_default_whitelist};
return ($self->{from_in_default_whitelist} > 0);
}
sub check_forged_in_default_whitelist {
my ($self) = @_;
$self->_check_from_in_default_whitelist unless exists $self->{from_in_default_whitelist};
$self->_check_from_in_whitelist unless exists $self->{from_in_whitelist};
return ($self->{from_in_default_whitelist} < 0) && ($self->{from_in_whitelist} == 0);
}
###########################################################################
sub _check_from_in_whitelist {
my ($self) = @_;
my $found_match = 0;
local ($_);
foreach $_ ($self->all_from_addrs()) {
if ($self->_check_whitelist ($self->{conf}->{whitelist_from}, $_)) {
$self->{from_in_whitelist} = 1;
return;
}
my $wh = $self->_check_whitelist_rcvd ($self->{conf}->{whitelist_from_rcvd}, $_);
if ($wh == 1) {
$self->{from_in_whitelist} = 1;
return;
}
elsif ($wh == -1) {
$found_match = -1;
}
}
$self->{from_in_whitelist} = $found_match;
return;
}
###########################################################################
sub _check_from_in_default_whitelist {
my ($self) = @_;
my $found_match = 0;
local ($_);
foreach $_ ($self->all_from_addrs()) {
my $wh = $self->_check_whitelist_rcvd ($self->{conf}->{def_whitelist_from_rcvd}, $_);
if ($wh == 1) {
$self->{from_in_default_whitelist} = 1;
return;
}
elsif ($wh == -1) {
$found_match = -1;
}
}
$self->{from_in_default_whitelist} = $found_match;
return;
}
###########################################################################
# look up $addr and trusted relays in a whitelist with rcvd
# note if it appears to be a forgery and $addr is not in any-relay list
sub _check_whitelist_rcvd {
my ($self, $list, $addr) = @_;
# we can only match this if we have at least 1 trusted or untrusted header
my $untrusted = $self->{num_relays_untrusted} || 0;
my $trusted = $self->{num_relays_trusted} || 0;
dbg ("rules: no Received: headers -- skipping _check_whitelist_rcvd")
unless ($untrusted + $trusted > 0);
return 0 unless ($untrusted + $trusted > 0);
my @relays = ();
# try the untrusted one first
if ($self->{num_relays_untrusted} > 0) {
@relays = $self->{relays_untrusted}->[0];
}
# then try the trusted ones; the user could have whitelisted a trusted
# relay, totally permitted
# but do not do this if any untrusted relays, to avoid forgery -- bug 4425
if ($self->{num_relays_trusted} > 0 && !$self->{num_relays_untrusted} ) {
push (@relays, @{$self->{relays_trusted}});
}
$addr = lc $addr;
my $found_forged = 0;
foreach my $white_addr (keys %{$list}) {
my $regexp = qr/$list->{$white_addr}{re}/i;
foreach my $domain (@{$list->{$white_addr}{domain}}) {
if ($addr =~ $regexp) {
foreach my $lastunt (@relays) {
my $rdns = $lastunt->{lc_rdns};
if ($rdns =~ /(?:^|\.)\Q${domain}\E$/i) {
dbg("rules: address $addr matches (def_)whitelist_from_rcvd $list->{$white_addr}{re} ${domain}");
return 1;
}
dbg ("rules: domain '$domain' not matched in rdns '$rdns'");
}
# found address match but no relay match. note as possible forgery
$found_forged = -1;
}
}
}
if ($found_forged) { # might be forgery. check if in list of exempted
my $wlist = $self->{conf}->{whitelist_allows_relays};
foreach my $fuzzy_addr (values %{$wlist}) {
if ($addr =~ /$fuzzy_addr/i) {
$found_forged = 0;
last;
}
}
}
return $found_forged;
}
###########################################################################
sub _check_whitelist {
my ($self, $list, $addr) = @_;
$addr = lc $addr;
if (defined ($list->{$addr})) { return 1; }
study $addr;
foreach my $regexp (values %{$list}) {
if ($addr =~ qr/$regexp/i) {
dbg("rules: address $addr matches whitelist or blacklist regexp: $regexp");
return 1;
}
}
return 0;
}
sub all_from_addrs {
my ($self) = @_;
if (exists $self->{all_from_addrs}) { return @{$self->{all_from_addrs}}; }
my @addrs;
# Resent- headers take priority, if present. see bug 672
# http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
my $resent = $self->get('Resent-From');
if (defined $resent && $resent =~ /\S/) {
@addrs = $self->{main}->find_all_addrs_in_line ($resent);
}
else {
# bug 2292: Used to use find_all_addrs_in_line() with the same
# headers, but the would catch addresses in comments which caused
# FNs for things like whitelist_from. Since all of these are From
# headers, there should only be 1 address in each anyway, so use the
# :addr code...
# bug 3366: some addresses come in as 'foo@bar...', which is invalid.
# so deal with the multiple periods.
@addrs = grep { defined($_) && length($_) > 0 } map { tr/././s; $_; }
($self->get('From:addr'), # std
$self->get('Envelope-Sender:addr'), # qmail: new-inject(1)
$self->get('Resent-Sender:addr'), # procmailrc manpage
$self->get('X-Envelope-From:addr'), # procmailrc manpage
$self->get('EnvelopeFrom:addr')); # SMTP envelope
# http://www.cs.tut.fi/~jkorpela/headers.html is useful here
}
# Remove duplicate addresses
my %addrs = map { $_ => 1 } @addrs;
@addrs = keys %addrs;
dbg("eval: all '*From' addrs: " . join(" ", @addrs));
$self->{all_from_addrs} = \@addrs;
return @addrs;
}
sub all_to_addrs {
my ($self) = @_;
if (exists $self->{all_to_addrs}) { return @{$self->{all_to_addrs}}; }
my @addrs;
# Resent- headers take priority, if present. see bug 672
# http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
my $resent = $self->get('Resent-To') . $self->get('Resent-Cc');
if (defined $resent && $resent =~ /\S/) {
@addrs = $self->{main}->find_all_addrs_in_line (
$self->get('Resent-To') . # std, rfc822
$self->get('Resent-Cc')); # std, rfc822
} else {
# OK, a fetchmail trick: try to find the recipient address from
# the most recent 3 Received lines. This is required for sendmail,
# since it does not add a helpful header like exim, qmail
# or Postfix do.
#
my $rcvd = $self->get('Received');
$rcvd =~ s/\n[ \t]+/ /gs;
$rcvd =~ s/\n+/\n/gs;
my @rcvdlines = split(/\n/, $rcvd, 4); pop @rcvdlines; # forget last one
my @rcvdaddrs = ();
foreach my $line (@rcvdlines) {
if ($line =~ / for (\S+\@\S+);/) { push (@rcvdaddrs, $1); }
}
@addrs = $self->{main}->find_all_addrs_in_line (
join(" ", @rcvdaddrs)."\n" .
$self->get('To') . # std
$self->get('Apparently-To') . # sendmail, from envelope
$self->get('Delivered-To') . # Postfix, poss qmail
$self->get('Envelope-Recipients') . # qmail: new-inject(1)
$self->get('Apparently-Resent-To') . # procmailrc manpage
$self->get('X-Envelope-To') . # procmailrc manpage
$self->get('Envelope-To') . # exim
$self->get('X-Delivered-To') . # procmail quick start
$self->get('X-Original-To') . # procmail quick start
$self->get('X-Rcpt-To') . # procmail quick start
$self->get('X-Real-To') . # procmail quick start
$self->get('Cc')); # std
# those are taken from various sources; thanks to Nancy McGough, who
# noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope>
}
dbg("eval: all '*To' addrs: " . join(" ", @addrs));
$self->{all_to_addrs} = \@addrs;
return @addrs;
# http://www.cs.tut.fi/~jkorpela/headers.html is useful here, also
# http://www.exim.org/pipermail/exim-users/Week-of-Mon-20001009/021672.html
}
###########################################################################
sub check_obfuscated_words {
my ($self, $body) = @_;
foreach my $line (@$body) {
while ($line =~ /[\w$WORD_OBFUSCATION_CHARS]/) {
# TODO, it seems ;)
}
}
}
sub check_unique_words {
my ($self, $body, $m, $b) = @_;
if (!defined $self->{unique_words_repeat}) {
$self->_check_unique_words($body);
}
# y = mx+b where y is number of unique words needed
my $unique = $self->{unique_words_unique};
my $repeat = $self->{unique_words_repeat};
my $y = ($unique + $repeat) * $m + $b;
return ($unique > $y);
}
sub _check_unique_words {
my ($self, $body) = @_;
$self->{unique_words_repeat} = 0;
$self->{unique_words_unique} = 0;
my %count;
for (@$body) {
# copy to avoid changing @$body
my $line = $_;
# from tokenize_line in Bayes.pm
$line =~ tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs;
$line =~ s/(\w)(\.{3,6})(\w)/$1 $2 $3/gs;
$line =~ s/(\w)(\-{2,6})(\w)/$1 $2 $3/gs;
$line =~ s/(?:^|\.\s+)([A-Z])([^A-Z]+)(?:\s|$)/ ' '.(lc $1).$2.' '/ge;
for my $token (split(' ', $line)) {
$count{$token}++;
}
}
$self->{unique_words_unique} = scalar grep { $_ == 1 } values(%count);
$self->{unique_words_repeat} = scalar keys(%count) - $self->{unique_words_unique};
}
###########################################################################
sub check_from_in_blacklist {
my ($self) = @_;
local ($_);
foreach $_ ($self->all_from_addrs()) {
if ($self->_check_whitelist ($self->{conf}->{blacklist_from}, $_)) {
return 1;
}
}
}
sub check_to_in_blacklist {
my ($self) = @_;
local ($_);
foreach $_ ($self->all_to_addrs()) {
if ($self->_check_whitelist ($self->{conf}->{blacklist_to}, $_)) {
return 1;
}
}
}
###########################################################################
# added by DJ
sub check_to_in_whitelist {
my ($self) = @_;
local ($_);
foreach $_ ($self->all_to_addrs()) {
if ($self->_check_whitelist ($self->{conf}->{whitelist_to}, $_)) {
return 1;
}
}
}
###########################################################################
# added by DJ
sub check_to_in_more_spam {
my ($self) = @_;
local ($_);
foreach $_ ($self->all_to_addrs()) {
if ($self->_check_whitelist ($self->{conf}->{more_spam_to}, $_)) {
return 1;
}
}
}
###########################################################################
# added by DJ
sub check_to_in_all_spam {
my ($self) = @_;
local ($_);
foreach $_ ($self->all_to_addrs()) {
if ($self->_check_whitelist ($self->{conf}->{all_spam_to}, $_)) {
return 1;
}
}
}
###########################################################################
sub check_rbl_backend {
my ($self, $rule, $set, $rbl_server, $type, $subtest) = @_;
local ($_);
# First check that DNS is available, if not do not perform this check
return 0 if $self->{conf}->{skip_rbl_checks};
return 0 unless $self->is_dns_available();
$self->load_resolver();
if (($rbl_server !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) &&
(index($rbl_server, '.') >= 0) &&
($rbl_server !~ /\.$/)) {
$rbl_server .= ".";
}
dbg("dns: checking RBL $rbl_server, set $set");
# ok, make a list of all the IPs in the untrusted set
my @fullips = map { $_->{ip} } @{$self->{relays_untrusted}};
# now, make a list of all the IPs in the external set, for use in
# notfirsthop testing. this will often be more IPs than found
# in @fullips. It includes the IPs that are trusted, but
# not in internal_networks.
my @fullexternal = map {
(!$_->{internal}) ? ($_->{ip}) : ()
} @{$self->{relays_trusted}};
push (@fullexternal, @fullips); # add untrusted set too
# Make sure a header significantly improves results before adding here
# X-Sender-Ip: could be worth using (very low occurance for me)
# X-Sender: has a very low bang-for-buck for me
my $IP_ADDRESS = IP_ADDRESS;
my @originating = ();
for my $header ('X-Originating-IP', 'X-Apparently-From') {
my $str = $self->get($header);
next unless $str;
push (@originating, ($str =~ m/($IP_ADDRESS)/g));
}
# Let's go ahead and trim away all private ips (KLC)
# also uniq the list and strip dups. (jm)
my @ips = $self->ip_list_uniq_and_strip_private(@fullips);
# if there's no untrusted IPs, it means we trust all the open-internet
# relays, so we can return right now.
return 0 unless (scalar @ips + scalar @originating > 0);
dbg("dns: IPs found: full-external: ".join(", ", @fullexternal).
" untrusted: ".join(", ", @ips).
" originating: ".join(", ", @originating));
my $trusted = $self->{conf}->{trusted_networks};
if (scalar @ips + scalar @originating > 0) {
# If name is foo-notfirsthop, check all addresses except for
# the originating one. Suitable for use with dialup lists, like the PDL.
# note that if there's only 1 IP in the untrusted set, do NOT pop the
# list, since it'd remove that one, and a legit user is supposed to
# use their SMTP server (ie. have at least 1 more hop)!
# If name is foo-lastexternal, check only the Received header just before
# it enters our internal networks; we can trust it and it's the one that
# passed mail between networks
if ($set =~ /-(notfirsthop|lastexternal)$/)
{
# use the external IP set, instead of the trusted set; the user may have
# specified some third-party relays as trusted. Also, don't use
# @originating; those headers are added by a phase of relaying through
# a server like Hotmail, which is not going to be in dialup lists anyway.
@ips = $self->ip_list_uniq_and_strip_private(@fullexternal);
if ($1 eq "lastexternal") {
@ips = (defined $ips[0]) ? ($ips[0]) : ();
} else {
pop @ips if (scalar @ips > 1);
}
}
# If name is foo-firsttrusted, check only the Received header just
# after it enters our trusted networks; that's the only one we can
# trust the IP address from (since our relay added that header).
# And if name is foo-untrusted, check any untrusted IP address.
elsif ($set =~ /-(first|un)trusted$/)
{
my @tips = ();
foreach my $ip (@originating) {
if ($ip && !$trusted->contains_ip($ip)) {
push(@tips, $ip);
}
}
@ips = $self->ip_list_uniq_and_strip_private (@ips, @tips);
if ($1 eq "first") {
@ips = (defined $ips[0]) ? ($ips[0]) : ();
} else {
shift @ips;
}
}
else
{
my @tips = ();
foreach my $ip (@originating) {
if ($ip && !$trusted->contains_ip($ip)) {
push(@tips, $ip);
}
}
# add originating IPs as untrusted IPs (if they are untrusted)
@ips = reverse $self->ip_list_uniq_and_strip_private (@ips, @tips);
# How many IPs max you check in the received lines
my $checklast=$self->{conf}->{num_check_received};
if (scalar @ips > $checklast) {
splice (@ips, $checklast); # remove all others
}
}
}
dbg("dns: only inspecting the following IPs: ".join(", ", @ips));
eval {
foreach my $ip (@ips) {
next unless ($ip =~ /(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/);
$self->do_rbl_lookup($rule, $set, $type, $rbl_server,
"$4.$3.$2.$1.$rbl_server", $subtest);
}
};
# note that results are not handled here, hits are handled directly
# as DNS responses are harvested
return 0;
}
sub check_rbl {
my ($self, $rule, $set, $rbl_server, $subtest) = @_;
$self->check_rbl_backend($rule, $set, $rbl_server, 'A', $subtest);
}
sub check_rbl_txt {
my ($self, $rule, $set, $rbl_server, $subtest) = @_;
$self->check_rbl_backend($rule, $set, $rbl_server, 'TXT', $subtest);
}
# run for first message
sub check_rbl_sub {
my ($self, $rule, $set, $subtest) = @_;
return 0 if $self->{conf}->{skip_rbl_checks};
return 0 unless $self->is_dns_available();
$self->register_rbl_subtest($rule, $set, $subtest);
}
# backward compatibility
sub check_rbl_results_for {
#warn "dns: check_rbl_results_for() is deprecated, use check_rbl_sub()\n";
check_rbl_sub(@_);
}
# this only checks the address host name and not the domain name because
# using the domain name had much worse results for dsn.rfc-ignorant.org
sub check_rbl_from_host {
_check_rbl_addresses(@_, $_[0]->all_from_addrs());
}
# this only checks the address host name and not the domain name because
# using the domain name had much worse results for dsn.rfc-ignorant.org
sub check_rbl_envfrom {
_check_rbl_addresses(@_, $_[0]->get('EnvelopeFrom:addr'));
}
sub _check_rbl_addresses {
my ($self, $rule, $set, $rbl_server, @addresses) = @_;
return 0 if $self->{conf}->{skip_rbl_checks};
return 0 unless $self->is_dns_available();
my %hosts;
for my $address (@addresses) {
if ($address =~ m/\@(\S+\.\S+)/) {
$hosts{lc($1)} = 1;
}
}
return unless scalar keys %hosts;
$self->load_resolver();
if (($rbl_server !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) &&
(index($rbl_server, '.') >= 0) &&
($rbl_server !~ /\.$/)) {
$rbl_server .= ".";
}
dbg("dns: _check_rbl_addresses RBL $rbl_server, set $set");
for my $host (keys %hosts) {
$self->do_rbl_lookup($rule, $set, 'A', $rbl_server, "$host.$rbl_server");
}
}
sub check_dns_sender {
my ($self, $rule) = @_;
my $host;
for my $from ($self->get('EnvelopeFrom:addr')) {
next unless defined $from;
$from =~ tr/././s; # bug 3366
if ($from =~ /\@(\S+\.\S+)/) {
$host = lc($1);
last;
}
}
return 0 unless defined $host;
# First check that DNS is available, if not do not perform this check
# TODO: need a way to skip DNS checks as a whole in configuration
return 0 unless $self->is_dns_available();
$self->load_resolver();
if ($host eq 'compiling.spamassassin.taint.org') {
# only used when compiling
return 0;
}
dbg("dns: checking A and MX for host $host");
$self->do_dns_lookup($rule, 'A', $host);
$self->do_dns_lookup($rule, 'MX', $host);
# cache name of host for later checking
$self->{sender_host} = $host;
return 0;
}
# interface called by SPF plugin
sub check_for_from_dns {
my ($self) = @_;
if (defined $self->{sender_host_fail}) {
return ($self->{sender_host_fail} == 2); # both MX and A need to fail
}
}
sub ip_list_uniq_and_strip_private {
my ($self, @origips) = @_;
my @ips = ();
my %seen = ();
my $IP_PRIVATE = IP_PRIVATE;
foreach my $ip (@origips) {
next unless $ip;
next if (exists ($seen{$ip})); $seen{$ip} = 1;
next if ($ip =~ /$IP_PRIVATE/o);
push(@ips, $ip);
}
return @ips;
}
###########################################################################
sub check_for_unique_subject_id {
my ($self) = @_;
local ($_);
$_ = lc $self->get('Subject');
study;
my $id = 0;
if (/[-_\.\s]{7,}([-a-z0-9]{4,})$/
|| /\s{10,}(?:\S\s)?(\S+)$/
|| /\s{3,}[-:\#\(\[]+([-a-z0-9]{4,})[\]\)]+$/
|| /\s{3,}[:\#\(\[]*([a-f0-9]{4,})[\]\)]*$/
|| /\s{3,}[-:\#]([a-z0-9]{5,})$/
|| /[\s._]{3,}([^0\s._]\d{3,})$/
|| /[\s._]{3,}\[(\S+)\]$/
# (7217vPhZ0-478TLdy5829qicU9-0@26) and similar
|| /\(([-\w]{7,}\@\d+)\)$/
# Seven or more digits at the end of a subject is almost certainly a id
|| /\b(\d{7,})\s*$/
# stuff at end of line after "!" or "?" is usually an id
|| /[!\?]\s*(\d{4,}|\w+(-\w+)+)\s*$/
# 9095IPZK7-095wsvp8715rJgY8-286-28 and similar
# excluding 'Re:', etc and the first word
|| /(?:\w{2,3}:\s)?\w+\s+(\w{7,}-\w{7,}(-\w+)*)\s*$/
# #30D7 and similar
|| /\s#\s*([a-f0-9]{4,})\s*$/
)
{
$id = $1;
# exempt online purchases
if ($id =~ /\d{5,}/
&& /(?:item|invoice|order|number|confirmation).{1,6}\Q$id\E\s*$/)
{
$id = 0;
}
# for the "foo-bar-baz" case, otherwise it won't
# be found in the dict:
$id =~ s/-//;
}
return ($id && !$self->word_is_in_dictionary($id));
}
# word_is_in_dictionary()
#
# See if the word looks like an English word, by checking if each triplet
# of letters it contains is one that can be found in the English language.
# Does not include triplets only found in proper names, or in the Latin
# and Greek terms that might be found in a larger dictionary
my %triplets = ();
my $triplets_loaded = 0;
sub word_is_in_dictionary {
my ($self, $word) = @_;
local ($_);
local $/ = "\n"; # Ensure $/ is set appropriately
# $word =~ tr/A-Z/a-z/; # already done by this stage
$word =~ s/^\s+//;
$word =~ s/\s+$//;
# If it contains a digit, dash, etc, it's not a valid word.
# Don't reject words like "can't" and "I'll"
return 0 if ($word =~ /[^a-z\']/);
# handle a few common "blah blah blah (comment)" styles
return 1 if ($word eq "ot"); # off-topic
return 1 if ($word =~ /(?:linux|nix|bsd)/); # not in most dicts
return 1 if ($word =~ /(?:whew|phew|attn|tha?nx)/); # not in most dicts
my $word_len = length($word);
# Unique IDs probably aren't going to be only one or two letters long
return 1 if ($word_len < 3);
if (!$triplets_loaded) {
my $filename = $self->{main}->find_rule_support_file('triplets.txt');
if (!defined $filename) {
dbg("eval: failed to locate the triplets.txt file");
return 1;
}
if (!open (TRIPLETS, "<$filename")) {
dbg("eval: failed to open '$filename', cannot check dictionary");
return 1;
}
while(<TRIPLETS>) {
chomp;
$triplets{$_} = 1;
}
close(TRIPLETS);
$triplets_loaded = 1;
} # if (!$triplets_loaded)
my $i;
for ($i = 0; $i < ($word_len - 2); $i++) {
my $triplet = substr($word, $i, 3);
if (!$triplets{$triplet}) {
dbg("eval: unique ID: letter triplet '$triplet' from word '$word' not valid");
return 0;
}
} # for ($i = 0; $i < ($word_len - 2); $i++)
# All letter triplets in word were found to be valid
return 1;
}
sub get_address_commonality_ratio {
my ($self, $addr1, $addr2) = @_;
# Ignore "@" and ".". "@" will always be the same in both, and the
# number of "." will almost always be the same
$addr1 =~ s/[\@\.]//g;
$addr2 =~ s/[\@\.]//g;
my %counts1 = ();
my %counts2 = ();
foreach (split(//, lc $addr1)) {
$counts1{$_}++;
}
foreach (split(//, lc $addr2)) {
$counts2{$_}++;
}
my $different = 0;
my $same = 0;
my $unique = 0;
my $char;
my @chars = keys %counts1;
# Extract unique characters, and make the two hashes have the same
# set of keys
foreach $char (@chars) {
if (!defined ($counts2{$char})) {
$unique += $counts1{$char};
delete ($counts1{$char});
}
}
@chars = keys %counts2;
foreach $char (@chars) {
if (!defined ($counts1{$char})) {
$unique += $counts2{$char};
delete ($counts2{$char});
}
}
# Hashes now have identical sets of keys; count the differences
# between the values.
@chars = keys %counts1;
foreach $char (@chars) {
my $count1 = $counts1{$char} || 0.0;
my $count2 = $counts2{$char} || 0.0;
if ($count1 == $count2) {
$same += $count1;
}
else {
$different += abs($count1 - $count2);
}
}
$different += $unique / 2.0;
$same ||= 1.0;
my $ratio = $different / $same;
#print STDERR "addrcommonality $addr1/$addr2($different<$unique>/$same)"
# . " = $ratio\n";
return $ratio;
}
###########################################################################
sub check_for_forged_gw05_received_headers {
my ($self) = @_;
local ($_);
my $rcv = $self->get('Received');
# e.g.
# Received: from mail3.icytundra.com by gw05 with ESMTP; Thu, 21 Jun 2001 02:28:32 -0400
my ($h1, $h2) = ($rcv =~
m/\nfrom\s(\S+)\sby\s(\S+)\swith\sESMTP\;\s+\S\S\S,\s+\d+\s+\S\S\S\s+
\d{4}\s+\d\d:\d\d:\d\d\s+[-+]*\d{4}\n$/xs);
if (defined ($h1) && defined ($h2) && $h2 !~ /\./) {
return 1;
}
0;
}
###########################################################################
sub check_for_faraway_charset {
my ($self, $body) = @_;
my $type = $self->get('Content-Type');
my @locales = $self->get_my_locales();
return 0 if grep { $_ eq "all" } @locales;
$type = get_charset_from_ct_line ($type);
if (defined $type &&
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales
($type, @locales))
{
# sanity check. Some charsets (e.g. koi8-r) include the ASCII
# 7-bit charset as well, so make sure we actually have a high
# number of 8-bit chars in the body text first.
$body = join("\n", @$body);
if ($self->are_more_high_bits_set ($body)) {
return 1;
}
}
0;
}
sub check_for_faraway_charset_in_headers {
my ($self) = @_;
my $hdr;
my @locales = $self->get_my_locales();
return 0 if grep { $_ eq "all" } @locales;
for my $h (qw(From Subject)) {
my @hdrs = $self->get("$h:raw");
if ($#hdrs >= 0) {
$hdr = join(" ", @hdrs);
} else {
$hdr = '';
}
while ($hdr =~ /=\?(.+?)\?.\?.*?\?=/g) {
Mail::SpamAssassin::Locales::is_charset_ok_for_locales($1, @locales)
or return 1;
}
}
0;
}
sub get_charset_from_ct_line {
my $type = shift;
if ($type =~ /charset="([^"]+)"/i) { return $1; }
if ($type =~ /charset='([^']+)'/i) { return $1; }
if ($type =~ /charset=(\S+)/i) { return $1; }
return undef;
}
sub get_my_locales {
my ($self) = @_;
my @locales = split(' ', $self->{conf}->{ok_locales});
my $lang = $ENV{'LC_ALL'};
$lang ||= $ENV{'LANGUAGE'};
$lang ||= $ENV{'LC_MESSAGES'};
$lang ||= $ENV{'LANG'};
push (@locales, $lang) if defined($lang);
return @locales;
}
###########################################################################
sub _check_for_round_the_world_received {
my ($self) = @_;
my ($relayer, $relayerip, $relay);
$self->{round_the_world_revdns} = 0;
$self->{round_the_world_helo} = 0;
my $rcvd = $self->get('Received');
my $IPV4_ADDRESS = IPV4_ADDRESS;
# TODO: use new Received header parser
# trad sendmail/postfix fmt:
# Received: from hitower.parkgroup.ru (unknown [212.107.207.26]) by
# mail.netnoteinc.com (Postfix) with ESMTP id B8CAC11410E for
# <me@netnoteinc.com>; Fri, 30 Nov 2001 02:42:05 +0000 (Eire)
# Received: from fmx1.freemail.hu ([212.46.197.200]) by hitower.parkgroup.ru
# (Lotus Domino Release 5.0.8) with ESMTP id 2001113008574773:260 ;
# Fri, 30 Nov 2001 08:57:47 +1000
if ($rcvd =~ /
\nfrom\b.{0,20}\s(\S+\.${CCTLDS_WITH_LOTS_OF_OPEN_RELAYS})\s\(.{0,200}
\nfrom\b.{0,20}\s([-_A-Za-z0-9.]+)\s.{0,30}\[($IPV4_ADDRESS)\]
/osix) { $relay = $1; $relayer = $2; $relayerip = $3; goto gotone; }
return 0;
gotone:
my $revdns = $self->lookup_ptr ($relayerip);
if (!defined $revdns) { $revdns = '(unknown)'; }
dbg("eval: round-the-world: mail relayed through $relay by ".
"$relayerip (HELO $relayer, rev DNS says $revdns)");
if ($revdns =~ /\.${ROUND_THE_WORLD_RELAYERS}$/oi) {
dbg("eval: round-the-world: yep, I think so (from rev dns)");
$self->{round_the_world_revdns} = 1;
return;
}
if ($relayer =~ /\.${ROUND_THE_WORLD_RELAYERS}$/oi) {
dbg("eval: round-the-world: yep, I think so (from HELO)");
$self->{round_the_world_helo} = 1;
return;
}
dbg("eval: round-the-world: probably not");
return;
}
sub check_for_round_the_world_received_helo {
my ($self) = @_;
if (!defined $self->{round_the_world_helo}) {
$self->_check_for_round_the_world_received();
}
if ($self->{round_the_world_helo}) { return 1; }
return 0;
}
sub check_for_round_the_world_received_revdns {
my ($self) = @_;
if (!defined $self->{round_the_world_revdns}) {
$self->_check_for_round_the_world_received();
}
if ($self->{round_the_world_revdns}) { return 1; }
return 0;
}
###########################################################################
sub check_for_shifted_date {
my ($self, $min, $max) = @_;
if (!exists $self->{date_diff}) {
$self->_check_date_diff();
}
return (($min eq 'undef' || $self->{date_diff} >= (3600 * $min)) &&
($max eq 'undef' || $self->{date_diff} < (3600 * $max)));
}
sub received_within_months {
# filters out some false positives in old corpus mail - Allen
my ($self,$min,$max) = @_;
if (!exists($self->{date_received})) {
$self->_check_date_received();
}
my $diff = time() - $self->{date_received};
# 365.2425 * 24 * 60 * 60 = 31556952 = seconds in year (including leap)
if (((! defined($min)) || ($min eq 'undef') ||
($diff >= (31556952 * ($min/12)))) &&
((! defined($max)) || ($max eq 'undef') ||
($diff < (31556952 * ($max/12))))) {
return 1;
} else {
return 0;
}
}
sub _get_date_header_time {
my $self = $_[0];
my $time;
# a Resent-Date: header takes precedence over any Date: header
DATE: for my $header ('Resent-Date', 'Date') {
my @dates = $self->{msg}->get_header($header);
for my $date (@dates) {
if (defined($date) && length($date)) {
chomp($date);
$time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
}
last DATE if defined($time);
}
}
if (defined($time)) {
$self->{date_header_time} = $time;
}
else {
$self->{date_header_time} = undef;
}
}
sub _get_received_header_times {
my $self = $_[0];
$self->{received_header_times} = [ () ];
$self->{received_fetchmail_time} = undef;
my (@received);
my $received = $self->get('Received');
if (defined($received) && length($received)) {
@received = grep {$_ =~ m/\S/} (split(/\n/,$received));
}
# if we have no Received: headers, chances are we're archived mail
# with a limited set of headers
if (!scalar(@received)) {
return;
}
# handle fetchmail headers
my (@local);
if (($received[0] =~
m/\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/) ||
($received[0] =~ m/qmail \d+ invoked by uid \d+/)) {
push @local, (shift @received);
}
if (scalar(@received) &&
($received[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
push @local, (shift @received);
}
elsif (scalar(@local)) {
unshift @received, (shift @local);
}
my $rcvd;
if (scalar(@local)) {
my (@fetchmail_times);
foreach $rcvd (@local) {
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
my $date = $1;
dbg("eval: trying Received fetchmail header date for real time: $date");
my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
if (defined($time) && (time() >= $time)) {
dbg("eval: time_t from date=$time, rcvd=$date");
push @fetchmail_times, $time;
}
}
}
if (scalar(@fetchmail_times) > 1) {
$self->{received_fetchmail_time} =
(sort {$b <=> $a} (@fetchmail_times))[0];
} elsif (scalar(@fetchmail_times)) {
$self->{received_fetchmail_time} = $fetchmail_times[0];
}
}
my (@header_times);
foreach $rcvd (@received) {
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
my $date = $1;
dbg("eval: trying Received header date for real time: $date");
my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
if (defined($time)) {
dbg("eval: time_t from date=$time, rcvd=$date");
push @header_times, $time;
}
}
}
if (scalar(@header_times)) {
$self->{received_header_times} = [ @header_times ];
} else {
dbg("eval: no dates found in Received headers");
}
}
sub _check_date_received {
my $self = $_[0];
my (@dates_poss);
$self->{date_received} = 0;
if (!exists($self->{date_header_time})) {
$self->_get_date_header_time();
}
if (defined($self->{date_header_time})) {
push @dates_poss, $self->{date_header_time};
}
if (!exists($self->{received_header_times})) {
$self->_get_received_header_times();
}
my (@received_header_times) = @{ $self->{received_header_times} };
if (scalar(@received_header_times)) {
push @dates_poss, $received_header_times[0];
}
if (defined($self->{received_fetchmail_time})) {
push @dates_poss, $self->{received_fetchmail_time};
}
if (defined($self->{date_header_time}) && scalar(@received_header_times)) {
if (!exists($self->{date_diff})) {
$self->_check_date_diff();
}
push @dates_poss, $self->{date_header_time} - $self->{date_diff};
}
if (scalar(@dates_poss)) { # use median
$self->{date_received} = (sort {$b <=> $a}
(@dates_poss))[int($#dates_poss/2)];
dbg("eval: date chosen from message: " .
scalar(localtime($self->{date_received})));
} else {
dbg("eval: no dates found in message");
}
}
sub _check_date_diff {
my $self = $_[0];
$self->{date_diff} = 0;
if (!exists($self->{date_header_time})) {
$self->_get_date_header_time();
}
if (!defined($self->{date_header_time})) {
return; # already have tests for this
}
if (!exists($self->{received_header_times})) {
$self->_get_received_header_times();
}
my (@header_times) = @{ $self->{received_header_times} };
if (!scalar(@header_times)) {
return; # archived mail?
}
my (@diffs) = map {$self->{date_header_time} - $_} (@header_times);
# if the last Received: header has no difference, then we choose to
# exclude it
if ($#diffs > 0 && $diffs[$#diffs] == 0) {
pop(@diffs);
}
# use the date with the smallest absolute difference
# (experimentally, this results in the fewest false positives)
@diffs = sort { abs($a) <=> abs($b) } @diffs;
$self->{date_diff} = $diffs[0];
}
###########################################################################
sub subject_is_all_caps {
my ($self) = @_;
my $subject = $self->get('Subject');
$subject =~ s/^\s+//;
$subject =~ s/\s+$//;
return 0 if $subject !~ /\s/; # don't match one word subjects
return 0 if (length $subject < 10); # don't match short subjects
$subject =~ s/[^a-zA-Z]//g; # only look at letters
# now, check to see if the subject is encoded using a non-ASCII charset.
# If so, punt on this test to avoid FPs. We just list the known charsets
# this test will FP on, here.
my $subjraw = $self->get('Subject:raw');
if ($subjraw =~ /=\?${CHARSETS_LIKELY_TO_FP_AS_CAPS}\?/i) {
return 0;
}
return length($subject) && ($subject eq uc($subject));
}
###########################################################################
# check an RBL if the message contains an "accreditor assertion,"
# that is, the message contains the name of a service that will vouch
# for their practices.
#
sub check_rbl_accreditor {
my ($self, $rule, $set, $rbl_server, $subtest, $accreditor) = @_;
if (!defined $self->{accreditor_tag}) {
$self->message_accreditor_tag();
}
if ($self->{accreditor_tag}->{$accreditor}) {
$self->check_rbl_backend($rule, $set, $rbl_server, 'A', $subtest);
}
return 0;
}
# Check for an Accreditor Assertion within the message, that is, the name of
# a third-party who will vouch for the sender's practices. The accreditor
# can be asserted in the EnvelopeFrom like this:
#
# listowner@a--accreditor.mail.example.com
#
# or in an 'Accreditor" Header field, like this:
#
# Accreditor: accreditor1, parm=value; accreditor2, parm-value
#
# This implementation supports multiple accreditors, but ignores any
# parameters in the header field.
#
sub message_accreditor_tag {
my ($self) = @_;
my %acctags;
if ($self->get('EnvelopeFrom:addr') =~ /[@.]a--([a-z0-9]{3,})\./i) {
(my $tag = $1) =~ tr/A-Z/a-z/;
$acctags{$tag} = -1;
}
my $accreditor_field = $self->get('Accreditor');
if (defined($accreditor_field)) {
my @accreditors = split(/,/, $accreditor_field);
foreach my $accreditor (@accreditors) {
my @terms = split(' ', $accreditor);
if ($#terms >= 0) {
my $tag = $terms[0];
$tag =~ tr/A-Z/a-z/;
$acctags{$tag} = -1;
}
}
}
$self->{accreditor_tag} = \%acctags;
}
###########################################################################
# BODY TESTS:
###########################################################################
sub body_charset_is_likely_to_fp {
my ($self) = @_;
# check for charsets where this test will FP -- iso-2022-jp, gb2312,
# koi8-r etc.
#
$self->_check_attachments unless exists $self->{mime_checked_attachments};
my @charsets = ();
my $type = $self->get('Content-Type');
$type = get_charset_from_ct_line ($type);
if (defined $type) {
push (@charsets, $type);
}
if (defined $self->{mime_html_charsets}) {
push (@charsets, split(' ', $self->{mime_html_charsets}));
}
foreach my $charset (@charsets) {
if ($charset =~ /^${CHARSETS_LIKELY_TO_FP_AS_CAPS}$/) {
return 1;
}
}
return 0;
}
sub check_for_uppercase {
my ($self, $body, $min, $max) = @_;
local ($_);
if (exists $self->{uppercase}) {
return ($self->{uppercase} > $min && $self->{uppercase} <= $max);
}
if ($self->body_charset_is_likely_to_fp()) {
$self->{uppercase} = 0; return 0;
}
# Dec 20 2002 jm: trade off some speed for low memory footprint, by
# iterating over the array computing sums, instead of joining the
# array into a giant string and working from that.
my $len = 0;
my $lower = 0;
my $upper = 0;
foreach (@{$body}) {
# examine lines in the body that have an intermediate space
next unless /\S\s+\S/;
# strip out lingering base64 (currently possible for forwarded messages)
next if /^(?:[A-Za-z0-9+\/=]{60,76} ){2}/;
my $line = $_; # copy so we don't muck up the original
# remove shift-JIS charset codes
$line =~ s/\x1b\$B.*\x1b\(B//gs;
$len += length($line);
# count numerals as lower case, otherwise 'date|mail' is spam
$lower += ($line =~ tr/a-z0-9//d);
$upper += ($line =~ tr/A-Z//);
}
# report only on mails above a minimum size; otherwise one
# or two acronyms can throw it off
if ($len < 200) {
$self->{uppercase} = 0;
return 0;
}
if (($upper + $lower) == 0) {
$self->{uppercase} = 0;
} else {
$self->{uppercase} = ($upper / ($upper + $lower)) * 100;
}
return ($self->{uppercase} > $min && $self->{uppercase} <= $max);
}
###########################################################################
# MIME/uuencode attachment tests
###########################################################################
# generic test version
sub check_for_mime {
my ($self, undef, $test) = @_;
$self->_check_attachments unless exists $self->{$test};
return $self->{$test};
}
# any text/html MIME part
sub check_for_mime_html {
my ($self) = @_;
my $ctype = $self->get('Content-Type');
return 1 if (defined($ctype) && $ctype =~ m@^text/html@i);
$self->_check_attachments unless exists $self->{mime_body_html_count};
return ($self->{mime_body_html_count} > 0);
}
# HTML without some other type of MIME text part
sub check_for_mime_html_only {
my ($self) = @_;
my $ctype = $self->get('Content-Type');
return 1 if (defined($ctype) && $ctype =~ m@^text/html@i);
$self->_check_attachments unless exists $self->{mime_body_html_count};
return ($self->{mime_body_html_count} > 0 &&
$self->{mime_body_text_count} == 0);
}
sub check_for_mime_excessive_qp {
my ($self, undef, $min) = @_;
$self->_check_attachments unless exists $self->{mime_qp_ratio};
return $self->{mime_qp_ratio} >= $min;
}
sub check_mime_multipart_ratio {
my ($self, undef, $min, $max) = @_;
$self->_check_attachments unless exists $self->{mime_multipart_alternative};
return ($self->{mime_multipart_ratio} >= $min &&
$self->{mime_multipart_ratio} < $max);
}
sub _check_mime_header {
my ($self, $ctype, $cte, $cd, $charset, $name) = @_;
$charset ||= '';
if ($ctype eq 'text/html') {
$self->{mime_body_html_count}++;
}
elsif ($ctype =~ m@^text@i) {
$self->{mime_body_text_count}++;
}
if ($cte =~ /base64/) {
$self->{mime_base64_count}++;
}
elsif ($cte =~ /quoted-printable/) {
$self->{mime_qp_count}++;
}
if ($cd && $cd =~ /attachment/) {
$self->{mime_attachment}++;
}
if ($ctype =~ /^text/ &&
$cte =~ /base64/ &&
$charset !~ /utf-8/ &&
!($cd && $cd =~ /^(?:attachment|inline)/))
{
$self->{mime_base64_encoded_text} = 1;
}
if ($cte =~ /base64/ && !$name) {
$self->{mime_base64_no_name} = 1;
}
if ($charset =~ /iso-\S+-\S+\b/i &&
$charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
{
$self->{mime_bad_iso_charset} = 1;
}
# MIME_BASE64_LATIN: now a zero-hitter
# if (!$name &&
# $cte =~ /base64/ &&
# $charset =~ /\b(?:us-ascii|iso-8859-(?:[12349]|1[0345])|windows-(?:125[0247]))\b/)
# {
# $self->{mime_base64_latin} = 1;
# }
# MIME_QP_NO_CHARSET: now a zero-hitter
# if ($cte =~ /quoted-printable/ && $cd =~ /inline/ && !$charset) {
# $self->{mime_qp_inline_no_charset} = 1;
# }
# MIME_HTML_NO_CHARSET: now a zero-hitter
# if ($ctype eq 'text/html' &&
# !(defined($charset) && $charset) &&
# !($cd && $cd =~ /^(?:attachment|inline)/))
# {
# $self->{mime_html_no_charset} = 1;
# }
if ($charset =~ /[a-z]/i) {
if (defined $self->{mime_html_charsets}) {
$self->{mime_html_charsets} .= " ".$charset;
} else {
$self->{mime_html_charsets} = $charset;
}
if (! $self->{mime_faraway_charset}) {
my @l = $self->get_my_locales();
if (!(grep { $_ eq "all" } @l) &&
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales($charset, @l))
{
$self->{mime_faraway_charset} = 1;
}
}
}
}
sub _check_attachments {
my ($self) = @_;
# MIME status
my $where = -1; # -1 = start, 0 = nowhere, 1 = header, 2 = body
my $qp_bytes = 0; # total bytes in QP regions
my $qp_count = 0; # QP-encoded bytes in QP regions
my @part_bytes; # MIME part total bytes
my @part_type; # MIME part types
# MIME header information
my $part = -1; # MIME part index
# indicate the scan has taken place
$self->{mime_checked_attachments} = 1;
# results
$self->{mime_base64_blanks} = 0;
$self->{mime_base64_count} = 0;
$self->{mime_base64_encoded_text} = 0;
# $self->{mime_base64_illegal} = 0;
# $self->{mime_base64_latin} = 0;
$self->{mime_base64_no_name} = 0;
$self->{mime_body_html_count} = 0;
$self->{mime_body_text_count} = 0;
$self->{mime_faraway_charset} = 0;
# $self->{mime_html_no_charset} = 0;
$self->{mime_missing_boundary} = 0;
$self->{mime_multipart_alternative} = 0;
$self->{mime_multipart_ratio} = 1.0;
$self->{mime_qp_count} = 0;
# $self->{mime_qp_illegal} = 0;
# $self->{mime_qp_inline_no_charset} = 0;
$self->{mime_qp_long_line} = 0;
$self->{mime_qp_ratio} = 0;
# Get all parts ...
foreach my $p ($self->{msg}->find_parts(qr/./)) {
# message headers
my ($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type"));
if ($ctype eq 'multipart/alternative') {
$self->{mime_multipart_alternative} = 1;
}
my $cte = $p->get_header('Content-Transfer-Encoding') || '';
chomp($cte = defined($cte) ? lc $cte : "");
my $cd = $p->get_header('Content-Disposition') || '';
chomp($cd = defined($cd) ? lc $cd : "");
$charset = lc $charset if ($charset);
$name = lc $name if ($name);
$self->_check_mime_header($ctype, $cte, $cd, $charset, $name);
# If we're not in a leaf node in the tree, there will be no raw
# section, so skip it.
if (! $p->is_leaf()) {
next;
}
$part++;
$part_type[$part] = $ctype;
$part_bytes[$part] = 0 if $cd !~ /attachment/;
my $previous = '';
foreach (@{$p->raw()}) {
if ($cte =~ /base64/i) {
if ($previous =~ /^\s*$/ && /^\s*$/) {
$self->{mime_base64_blanks} = 1;
}
# MIME_BASE64_ILLEGAL: now a zero-hitter
# if (m@[^A-Za-z0-9+/=\n]@ || /=[^=\s]/) {
# $self->{mime_base64_illegal} = 1;
# }
}
# if ($self->{mime_html_no_charset} && $ctype eq 'text/html' && defined $charset) {
# $self->{mime_html_no_charset} = 0;
# }
if ($self->{mime_multipart_alternative} && $cd !~ /attachment/ &&
($ctype eq 'text/plain' || $ctype eq 'text/html')) {
$part_bytes[$part] += length;
}
if ($where != 1 && $cte eq "quoted-printable" && ! /^SPAM: /) {
if (length > 77) {
$self->{mime_qp_long_line} = 1;
}
$qp_bytes += length;
# MIME_QP_DEFICIENT: zero-hitter now
# check for illegal substrings (RFC 2045), hexadecimal values 7F-FF and
# control characters other than TAB, or CR and LF as parts of CRLF pairs
# if (!$self->{mime_qp_illegal} && /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/)
# {
# $self->{mime_qp_illegal} = 1;
# }
# count excessive QP bytes
if (index($_, '=') != -1) {
# whoever wrote this next line is an evil hacker -- jm
my $qp = () = m/=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[0-9A-E])/g;
if ($qp) {
$qp_count += $qp;
# tabs and spaces at end of encoded line are okay. Also, multiple
# whitespace at the end of a line are OK, like ">=20=20=20=20=20=20".
my ($trailing) = m/((?:=09|=20)+)\s*$/g;
if ($trailing) {
$qp_count -= (length($trailing) / 3);
}
}
}
}
$previous = $_;
}
}
if ($qp_bytes) {
$self->{mime_qp_ratio} = $qp_count / $qp_bytes;
}
if ($self->{mime_multipart_alternative}) {
my $text;
my $html;
# bug 4207: we want the size of the last parts
for (my $i = $part; $i >= 0; $i--) {
next if !defined $part_bytes[$i];
if (!defined($html) && $part_type[$i] eq 'text/html') {
$html = $part_bytes[$i];
}
elsif (!defined($text) && $part_type[$i] eq 'text/plain') {
$text = $part_bytes[$i];
}
last if (defined($html) && defined($text));
}
if (defined($text) && defined($html) && $html > 0) {
$self->{mime_multipart_ratio} = ($text / $html);
}
}
# Look to see if any multipart boundaries are not "balanced"
foreach my $val (values %{$self->{msg}->{mime_boundary_state}}) {
if ($val != 0) {
$self->{mime_missing_boundary} = 1;
last;
}
}
}
###########################################################################
sub check_for_fake_aol_relay_in_rcvd {
my ($self) = @_;
local ($_);
$_ = $self->get('Received'); s/\s/ /gs;
# this is the hostname format used by AOL for their relays. Spammers love
# forging it. Don't make it more specific to match aol.com only, though --
# there's another set of spammers who generate fake hostnames to go with
# it!
if (/ rly-[a-z][a-z]\d\d\./i) {
return 0 if /\/AOL-\d+\.\d+\.\d+\)/; # via AOL mail relay
return 0 if /ESMTP id (?:RELAY|MAILRELAY|MAILIN)/; # AOLish
return 1;
}
# spam: Received: from unknown (HELO mta05bw.bigpond.com) (80.71.176.130) by
# rly-xw01.mx.aol.com with QMQP; Sat, 15 Jun 2002 23:37:16 -0000
# non: Received: from rly-xj02.mx.aol.com (rly-xj02.mail.aol.com [172.20.116.39]) by
# omr-r05.mx.aol.com (v83.35) with ESMTP id RELAYIN7-0501132011; Wed, 01
# May 2002 13:20:11 -0400
# non: Received: from logs-tr.proxy.aol.com (logs-tr.proxy.aol.com [152.163.201.132])
# by rly-ip01.mx.aol.com (8.8.8/8.8.8/AOL-5.0.0)
# with ESMTP id NAA08955 for <sapient-alumni@yahoogroups.com>;
# Thu, 4 Apr 2002 13:11:20 -0500 (EST)
return 0;
}
###########################################################################
sub check_for_to_in_subject {
my ($self, $test) = @_;
my $full_to = $self->get('To:addr');
return 0 unless $full_to;
my $subject = $self->get('Subject');
if ($test eq "address") {
return $subject =~ /\b\Q$full_to\E\b/i; # "user@domain.com"
}
elsif ($test eq "user") {
my $to = $full_to;
$to =~ s/\@.*//;
return $subject =~ /^\s*\Q$to\E,\s/i; # "user,\s" case insensitive
}
return 0;
}
###########################################################################
sub check_bayes {
my ($self, $fulltext, $min, $max) = @_;
return 0 if (!$self->{conf}->{use_bayes_rules});
if (!exists ($self->{bayes_score})) {
$self->{bayes_score} = $self->{main}->{bayes_scanner}->scan ($self, $self->{msg});
}
if (defined $self->{bayes_score} &&
($min == 0 || $self->{bayes_score} > $min) &&
($max eq "undef" || $self->{bayes_score} <= $max))
{
if ($self->{conf}->{detailed_bayes_score}) {
$self->test_log(sprintf ("score: %3.4f, hits: %s",
$self->{bayes_score},
$self->{bayes_hits}));
}
else {
$self->test_log(sprintf ("score: %3.4f", $self->{bayes_score}));
}
return 1;
}
return 0;
}
###########################################################################
sub check_outlook_message_id {
my ($self) = @_;
local ($_);
my $id = $self->get('MESSAGEID');
return 0 if $id !~ /^<[0-9a-f]{4}([0-9a-f]{8})\$[0-9a-f]{8}\$[0-9a-f]{8}\@/;
my $timetoken = hex($1);
my $x = 0.0023283064365387;
my $y = 27111902.8329849;
my $fudge = 250;
$_ = $self->get('Date');
$_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0;
my $expected = int (($_ * $x) + $y);
my $diff = $timetoken - $expected;
return 0 if (abs($diff) < $fudge);
$_ = $self->get('Received');
/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+).*?$/;
$_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0;
$expected = int(($_ * $x) + $y);
$diff = $timetoken - $expected;
return (abs($diff) >= $fudge);
}
sub check_messageid_not_usable {
my ($self) = @_;
local ($_);
# Lyris eats message-ids. also some ezmlm, I think :(
$_ = $self->get("List-Unsubscribe");
return 1 if (/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/);
# ezmlm again
if($self->gated_through_received_hdr_remover()) { return 1; }
# Allen notes this as 'Wacky sendmail version?'
$_ = $self->get("Received");
return 1 if /\/CWT\/DCE\)/;
# Apr 2 2003 jm: iPlanet rewrites lots of stuff, including Message-IDs
return 1 if /iPlanet Messaging Server/;
# too old; older versions of clients used different formats
return 1 if ($self->received_within_months('6','undef'));
return 0;
}
# Return true if the count of $hdr headers are within the given range
sub check_header_count_range {
my ($self, $hdr, $min, $max) = @_;
my %uniq = ();
my @hdrs = grep(!$uniq{$_}++, $self->{msg}->get_header ($hdr));
return (scalar @hdrs >= $min && scalar @hdrs <= $max);
}
sub check_blank_line_ratio {
my ($self, $fulltext, $min, $max, $minlines) = @_;
if (!defined $minlines || $minlines < 1) {
$minlines = 1;
}
if (! exists $self->{blank_line_ratio}->{$minlines}) {
$fulltext = $self->get_decoded_body_text_array();
my ($blank) = 0;
if (scalar @{$fulltext} >= $minlines) {
my $consecutive_blanks = 0;
foreach my $line (@{$fulltext}) {
if ($line =~ /\S/) {
$consecutive_blanks = 0;
next;
}
$consecutive_blanks++;
$blank++;
}
# ignore trailing blanks
$blank -= $consecutive_blanks;
my $total_lines = scalar @{$fulltext} - $consecutive_blanks;
$self->{blank_line_ratio}->{$minlines} = $total_lines == 0 ? -1 :
(100 * $blank / $total_lines);
}
else {
$self->{blank_line_ratio}->{$minlines} = -1; # don't report if it's a blank message ...
}
}
return (($min == 0 && $self->{blank_line_ratio}->{$minlines} <= $max) ||
($self->{blank_line_ratio}->{$minlines} > $min &&
$self->{blank_line_ratio}->{$minlines} <= $max));
}
sub sent_by_applemail {
my ($self) = @_;
return 0 unless ($self->get("MIME-Version") =~ /Apple Message framework/);
return 0 unless ($self->get("X-Mailer") =~ /^Apple Mail \(\d+\.\d+\)/);
return 0 unless ($self->get("Message-Id") =~
/^<[A-F0-9]+(?:-[A-F0-9]+){4}\@\S+.\S+>$/);
return 1;
}
sub check_for_rdns_helo_mismatch { # T_FAKE_HELO_*
my ($self, $rdns, $helo) = @_;
# oh for ghod's sake. Apple's Mail.app HELO's as the right-hand
# side of the From address. So "HELO jmason.org" in my case.
# This is (obviously) considered forgery, since it's exactly
# what ratware does too.
return 0 if $self->sent_by_applemail();
# the IETF's list-management system mangles Received headers,
# "faking" a HELO, resulting in FPs. So if we received the
# mail from the IETF's outgoing SMTP server, skip it.
if ($self->{relays_untrusted_str} =~ /^\[ [^\]]*
ip=132\.151\.1\.\S+\s+ rdns=\S*ietf\.org /x)
{
return 0;
}
my $firstuntrusted = 1;
foreach my $relay (@{$self->{relays_untrusted}}) {
my $wasfirst = $firstuntrusted;
$firstuntrusted = 0;
# did the machine HELO as a \S*something\.com machine?
if ($relay->{helo} !~ /(?:\.|^)${helo}$/) { next; }
my $claimed = $relay->{rdns};
my $claimedmatches = ($claimed =~ /(?:\.|^)${rdns}$/);
if ($claimedmatches && $wasfirst) {
# the first untrusted Received: hdr is inserted by a trusted MTA.
# so if the rDNS pattern matches, we're good, skip it
next;
}
if ($claimedmatches && !$wasfirst) {
# it's a possibly-forged rDNS lookup. Do a verification lookup
# to ensure the host really does match what the rDNS lookup
# claims it is.
if ($self->is_dns_available()) {
my $vrdns = $self->lookup_ptr ($relay->{ip});
if (defined $vrdns && $vrdns ne $claimed) {
dbg("eval: rdns/helo mismatch: helo=$relay->{helo} ".
"claimed-rdns=$claimed true-rdns=$vrdns");
return 1;
# TODO: instead, we should set a flag and check it later for
# another test; but that relies on complicated test ordering
}
}
}
if (!$claimedmatches) {
if (!$self->is_dns_available()) {
if ($relay->{rdns_not_in_headers}) {
# that's OK then; it's just the MTA which picked it up,
# is not configured to perform lookups, and we're offline
# so we couldn't either.
return 0;
}
}
# otherwise there *is* a mismatch
dbg("eval: rdns/helo mismatch: helo=$relay->{helo} rdns=$claimed");
return 1;
}
}
0;
}
# note using IPv4 addresses for now due to empty strings matching IP_ADDRESS
# due to bug in pure IPv6 address regular expression
sub helo_ip_mismatch {
my ($self) = @_;
my $IP_ADDRESS = IPV4_ADDRESS;
my $IP_PRIVATE = IP_PRIVATE;
for my $relay (@{$self->{relays_untrusted}}) {
# is HELO usable?
next unless ($relay->{helo} =~ m/^$IP_ADDRESS$/ &&
$relay->{helo} !~ /$IP_PRIVATE/);
# compare HELO with IP
return 1 if ($relay->{ip} =~ m/^$IP_ADDRESS$/ &&
$relay->{ip} !~ m/$IP_PRIVATE/ &&
$relay->{helo} ne $relay->{ip} &&
# different IP is okay if in same /24
$relay->{helo} =~ /^(\d+\.\d+\.\d+\.)/ &&
index($relay->{ip}, $1) != 0);
}
0;
}
###########################################################################
sub check_all_trusted {
my ($self) = @_;
return $self->{num_relays_trusted}
&& !$self->{num_relays_untrusted}
&& !$self->{num_relays_unparseable};
}
sub check_no_relays {
my ($self) = @_;
return !$self->{num_relays_trusted}
&& !$self->{num_relays_untrusted}
&& !$self->{num_relays_unparseable};
}
sub check_relays_unparseable {
my ($self) = @_;
return $self->{num_relays_unparseable};
}
###########################################################################
# HTML parser tests
###########################################################################
sub html_tag_balance {
my ($self, undef, $rawtag, $rawexpr) = @_;
$rawtag =~ /^([a-zA-Z0-9]+)$/; my $tag = $1;
$rawexpr =~ /^([\<\>\=\!\-\+ 0-9]+)$/; my $expr = $1;
return 0 unless exists $self->{html}{inside}{$tag};
$self->{html}{inside}{$tag} =~ /^([\<\>\=\!\-\+ 0-9]+)$/;
my $val = $1;
return eval "\$val $expr";
}
sub html_image_only {
my ($self, undef, $min, $max) = @_;
return (exists $self->{html}{inside}{img} &&
exists $self->{html}{length} &&
$self->{html}{length} > $min &&
$self->{html}{length} <= $max);
}
sub html_image_ratio {
my ($self, undef, $min, $max) = @_;
return 0 unless (exists $self->{html}{non_space_len} &&
exists $self->{html}{image_area} &&
$self->{html}{image_area} > 0);
my $ratio = $self->{html}{non_space_len} / $self->{html}{image_area};
return ($ratio > $min && $ratio <= $max);
}
sub html_charset_faraway {
my ($self) = @_;
return 0 unless exists $self->{html}{charsets};
my @locales = $self->get_my_locales();
return 0 if grep { $_ eq "all" } @locales;
my $okay = 0;
my $bad = 0;
for my $c (split(' ', $self->{html}{charsets})) {
if (Mail::SpamAssassin::Locales::is_charset_ok_for_locales($c, @locales)) {
$okay++;
}
else {
$bad++;
}
}
return ($bad && ($bad >= $okay));
}
sub html_tag_exists {
my ($self, undef, $tag) = @_;
return exists $self->{html}{inside}{$tag};
}
sub html_test {
my ($self, undef, $test) = @_;
return $self->{html}{$test};
}
sub html_eval {
my ($self, undef, $test, $rawexpr) = @_;
$rawexpr =~ /^([\<\>\=\!\-\+ 0-9]+)$/; my $expr = $1;
# workaround bug 3320: wierd perl bug where additional, very explicit
# untainting into a new var is required.
my $tainted = $self->{html}{$test};
return unless defined($tainted);
$tainted =~ /^(.*)$/; my $val = $1;
# just use the value in $val, don't copy it needlessly
return eval "\$val $expr";
}
sub html_text_match {
my ($self, undef, $text, $regexp) = @_;
for my $string (@{ $self->{html}{$text} }) {
if (defined $string && $string =~ /${regexp}/) {
return 1;
}
}
return 0;
}
sub html_title_subject_ratio {
my ($self, undef, $ratio) = @_;
my $subject = $self->get('Subject');
if (! $subject) {
return 0;
}
my $max = 0;
for my $string (@{ $self->{html}{title} }) {
if ($string) {
my $ratio = length($string) / length($subject);
$max = $ratio if $ratio > $max;
}
}
return $max > $ratio;
}
sub html_text_not_match {
my ($self, undef, $text, $regexp) = @_;
for my $string (@{ $self->{html}{$text} }) {
if (defined $string && $string !~ /${regexp}/) {
return 1;
}
}
return 0;
}
sub html_range {
my ($self, undef, $test, $min, $max) = @_;
return 0 unless exists $self->{html}{$test};
$test = $self->{html}{$test};
# not all perls understand what "inf" means, so we need to do
# non-numeric tests! urg!
if (!defined $max || $max eq "inf") {
return ($test eq "inf") ? 1 : ($test > $min);
}
elsif ($test eq "inf") {
# $max < inf, so $test == inf means $test > $max
return 0;
}
else {
# if we get here everything should be a number
return ($test > $min && $test <= $max);
}
}
###########################################################################
sub multipart_alternative_difference {
my ($self, $fulltext, $min, $max) = @_;
$self->_multipart_alternative_difference() unless (exists $self->{madiff});
if (($min == 0 || $self->{madiff} > $min) &&
($max eq "undef" || $self->{madiff} <= $max)) {
return 1;
}
return 0;
}
sub multipart_alternative_difference_count {
my ($self, $fulltext, $ratio, $minhtml) = @_;
$self->_multipart_alternative_difference() unless (exists $self->{madiff});
return 0 unless $self->{madiff_html} > $minhtml;
return(($self->{madiff_text} / $self->{madiff_html}) > $ratio);
}
sub _multipart_alternative_difference {
my ($self) = @_;
$self->{madiff} = 0;
$self->{madiff_html} = 0;
$self->{madiff_text} = 0;
# Find all multipart/alternative parts in the message
my @ma = $self->{msg}->find_parts(qr@^multipart/alternative\b@i);
# If there are no multipart/alternative sections, skip this test.
return if (!@ma);
# Figure out what the MIME content of the message looks like
my @content = $self->{msg}->content_summary();
# Exchange meeting requests come in as m/a text/html text/calendar,
# which we want to ignore because of the high FP rate it would cause.
#
if (@content == 3 && $content[2] eq 'text/calendar' &&
$content[1] eq 'text/html' &&
$content[0] eq 'multipart/alternative') {
return;
}
# Go through each of the multipart parts
foreach my $part (@ma) {
my %html = ();
my %text = ();
# limit our search to text-based parts
my @txt = $part->find_parts(qr@^text\b@i);
foreach my $text (@txt) {
# we only care about the rendered version of the part
my ($type, $rnd) = $text->rendered();
# parse the rendered text into tokens. assume they are whitespace
# separated, and ignore anything that doesn't have a word-character
# in it (0-9a-zA-Z_) since those are probably things like bullet
# points, horizontal lines, etc. this assumes that punctuation
# in one part will be the same in other parts.
#
if ($type eq 'text/html') {
foreach my $w (grep(/\w/, split(/\s+/, $rnd))) {
#dbg("eval: HTML: $w");
$html{$w}++;
}
# If there are no words, mark if there's at least 1 image ...
if (keys %html == 0 && exists $self->{html}{inside}{img}) {
# Use "\n" as the mark since it can't ever occur normally
$html{"\n"}=1;
}
}
else {
foreach my $w (grep(/\w/, split(/\s+/, $rnd))) {
#dbg("eval: TEXT: $w");
$text{$w}++;
}
}
}
# How many HTML tokens do we have at the start?
my $orig = keys %html;
next if ($orig == 0);
$self->{madiff_html} = $orig;
$self->{madiff_text} = keys %text;
dbg('eval: text words: ' . $self->{madiff_text} . ', html words: ' . $self->{madiff_html});
# If the token appears at least as many times in the text part as
# in the html part, remove it from the list of html tokens.
while(my ($k,$v) = each %text) {
delete $html{$k} if (exists $html{$k} && $html{$k}-$text{$k} < 1);
}
#map { dbg("eval: LEFT: $_") } keys %html;
# In theory, the tokens should be the same in both text and html
# parts, so there would be 0 tokens left in the html token list, for
# a 0% difference rate. Calculate it here, and record the difference
# if it's been the highest so far in this message.
my $diff = scalar(keys %html)/$orig*100;
$self->{madiff} = $diff if ($diff > $self->{madiff});
dbg("eval: " . sprintf "madiff: left: %d, orig: %d, max-difference: %0.2f%%", scalar(keys %html), $orig, $self->{madiff});
}
return;
}
###########################################################################
sub check_domain_ratio {
my ($self, $body, $ratio) = @_;
my $length = (length(join('', @{$body})) || 1);
if (!defined $self->{uri_domain_count}) {
$self->get_uri_list();
}
return 0 if !defined $self->{uri_domain_count};
return (($self->{uri_domain_count} / $length) > $ratio);
}
###########################################################################
sub check_for_http_redirector {
my ($self) = @_;
foreach ($self->get_uri_list()) {
while (s{^https?://([^/:\?]+).+?(https?:/{0,2}?([^/:\?]+).*)$}{$2}) {
my ($redir, $dest) = ($1, $3);
foreach ($redir, $dest) {
$_ = Mail::SpamAssassin::Util::uri_to_domain(lc($_)) || $_;
}
next if ($redir eq $dest);
dbg("eval: redirect: found $redir to $dest, flagging");
return 1;
}
}
return 0;
}
###########################################################################
sub check_for_numeric_helo {
my ($self) = @_;
my $rcvd = $self->{relays_untrusted_str};
if ($rcvd) {
my $IP_ADDRESS = IPV4_ADDRESS;
my $IP_PRIVATE = IP_PRIVATE;
if ($rcvd =~ /helo=($IP_ADDRESS)\b/i && $1 !~ /$IP_PRIVATE/) {
return 1;
}
}
return 0;
}
sub check_for_illegal_ip {
my ($self) = @_;
foreach my $rcvd ( @{$self->{relays_untrusted}} ) {
# (note this might miss some hits if the Received.pm skips any invalid IPs)
foreach my $check ( $rcvd->{ip}, $rcvd->{by} ) {
next if ($check eq '127.0.0.1');
return 1 if ($check =~ /^
(?:[01257]|127|22[3-9]|23[0-9]|24[0-9]|25[0-5])\.\d+\.\d+\.\d+
$/x);
}
}
return 0;
}
###########################################################################
sub check_msg_parse_flags {
my($self, $type, $type2) = @_;
$type = $type2 if ref($type);
return defined $self->{msg}->{$type};
}
###########################################################################
sub check_unresolved_template {
my ($self) = @_;
my $all = $self->get('ALL'); # cached access
$all =~ s/\n[ \t]+/ /gs; # fix continuation lines
for my $header (split(/\n/, $all)) {
# slightly faster to test in this order
if ($header =~ /%[A-Z][A-Z_-]/ &&
$header !~ /^(?:X-UIDL|X-Face|To|Cc|From|Subject|References|In-Reply-To|(?:X-|Resent-|X-Original-)?Message-Id):/i)
{
return 1;
}
}
return 0;
}
###########################################################################
sub check_ratware_name_id {
my ($self) = @_;
my $mid = $self->get('MESSAGEID');
my $from = $self->get('From');
if ($mid =~ m/<[A-Z]{28}\.([^>]+?)>/) {
if ($from =~ m/\"[^\"]+\"\s*<\Q$1\E>/) {
return 1;
}
}
return 0;
}
sub check_https_ip_mismatch {
my ($self) = @_;
while (my($k,$v) = each %{$self->{html}->{uri_detail}}) {
next if ($k !~ m%^https?:/*(?:[^\@/]+\@)?\d+\.\d+\.\d+\.\d+%i);
foreach (@{$v->{anchor_text}}) {
next if (m%^https:/*(?:[^\@/]+\@)?\d+\.\d+\.\d+\.\d+%i);
if (m%https:%i) {
keys %{$self->{html}->{uri_detail}}; # resets iterator, bug 4829
return 1;
}
}
}
return 0;
}
sub check_iframe_src {
my ($self) = @_;
foreach my $v ( values %{$self->{html}->{uri_detail}} ) {
return 1 if $v->{types}->{iframe};
}
return 0;
}
sub check_ratware_envelope_from {
my ($self) = @_;
my $to = $self->get('To:addr');
my $from = $self->get('EnvelopeFrom');
return 0 unless ($to && $from);
return 0 if ($from =~ /^SRS\d=/);
if ($to =~ /^([^@]+)@(.+)$/) {
my($user,$dom) = ($1,$2);
$dom = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($dom);
return unless
(Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($dom));
return 1 if ($from =~ /\b\Q$dom\E.\Q$user\E@/i);
}
return 0;
}
# came up on the users@ list, look for multipart/alternative parts which
# include non-text parts -- skip certain types which occur normally in ham
sub check_ma_non_text {
my $self = shift;
foreach my $map ($self->{msg}->find_parts(qr@^multipart/alternative$@i)) {
foreach my $p ($map->find_parts(qr/./, 1, 0)) {
next if (lc $p->{'type'} eq 'multipart/related');
next if (lc $p->{'type'} eq 'application/rtf');
next if ($p->{'type'} =~ m@^text/@i);
return 1;
}
}
return 0;
}
1;