describe URI_REDIRECTOR Message has HTTP redirector URI
# a.com.b.c
uri SPOOF_COM2OTH m{^https?://(?:\w+\.)+?com\.(?:\w+\.){2}}i
describe SPOOF_COM2OTH URI contains ".com" in middle
# a.com.b.com
uri SPOOF_COM2COM m{^https?://(?:\w+\.)+?com\.(?:\w+\.)+?com}i
describe SPOOF_COM2COM URI contains ".com" in middle and end
# a.net.b.com
uri SPOOF_NET2COM m{^https?://(?:\w+\.)+?(?:net|org)\.(?:\w+\.)+?com}i
describe SPOOF_NET2COM URI contains ".net" or ".org", then ".com"
# CDNs (Akamai (edgesuite), Speedera, and NYUD, so far) do this, so skip them
uri SPOOF_OURI m{^https?://(?:[a-z0-9_-]+?\.){2,}(?:com|net|org|biz|info|edu|www)(?!\.(?:\w+\.)?(?:edgesuite|nyud|speedera)\.net)(?:\.[a-z0-9_%-]+?){2,}(?:(?::|%3a)\d+)?}i
describe SPOOF_OURI URI has items in odd places
uri URI_DIGITS m%^https?://[^/?]*\b\d{6,}\b%i
describe URI_DIGITS URI hostname has long digit sequence
uri URI_HEX m%^https?://[^/?]*\b[0-9a-f]{6,}\b%i
describe URI_HEX URI hostname has long hexadecimal sequence
uri URI_NOVOWEL m%^https?://[^/?]*[bcdfghjklmnpqrstvwxz]{7}%i
describe URI_NOVOWEL URI hostname has long non-vowel sequence
uri URI_UNSUBSCRIBE /\b(?:gone|opened|out)\.php/i
describe URI_UNSUBSCRIBE URI contains suspicious unsubscribe link
uri URI_UPPER_LOWER m{(?i:https?)://([A-Z][a-z]+\.){2,}[A-Za-z\d-]+\.[a-z]{2,4}(?:[?/]|$)}
describe URI_UPPER_LOWER URI contains capitalized hostname parts ("Abcde")
# bug 3896: URIs in various TLDs, other than 3rd level www
uri URI_NO_WWW_INFO_CGI /^(?:https?:\/\/)?[^\/]+(?<!\/www)\.[^.]{7,}\.info\/(?=\S{15,})\S*\?/i
describe URI_NO_WWW_INFO_CGI CGI in .info TLD other than third-level "www"
uri URI_NO_WWW_BIZ_CGI /^(?:https?:\/\/)?[^\/]+(?<!\/www)\.[^.]{7,}\.biz\/(?=\S{15,})\S*\?/i
describe URI_NO_WWW_BIZ_CGI CGI in .biz TLD other than third-level "www"
uri URI_NO_WWW_ANY_CGI /^(?:https?:\/\/)?[^\/]+(?<!\/www)\.[^.]{3,}\.[^.]{3,}\.(?!biz|info)[a-z]{2,4}\/\?\S{14}/i
describe URI_NO_WWW_ANY_CGI CGI with long hostname other fourth-level "www"
uri URI_SCHEME_MIXED_CASE /^(?![a-z]{3,6}:|[A-Z]{3,6})[A-Za-z]{3,6}:\//
describe URI_SCHEME_MIXED_CASE URI scheme has mixed uppercase and lowercase
# bug 678
uri DOMAIN_4U2 /[\@\.]\S{0,20}(?:[^0-9][42](?:yo)?u|for-*you)(?:[.-]\S{1,20})?\.(?:net|com|org|info)\b/
describe DOMAIN_4U2 Domain name containing a "4u" variant
# possible IDN spoofing attack: http://www.shmoo.com/idn/homograph.txt
# not expecting any hits on this (yet)
uri HIGH_CODEPAGE_URI /^https?:\/\/[^\/]*\&\#(?:\d{4,}|[3456789]\d\d);/i