home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
rtsi.com
/
2014.01.www.rtsi.com.tar
/
www.rtsi.com
/
OS9
/
FAQ
/
cgi-bin
/
discus4_00
/
source
/
profane.pl
< prev
next >
Wrap
Text File
|
2009-11-06
|
8KB
|
266 lines
# FILE: profane.pl
# DESCRIPTION: Profanity filtering of various kinds
#-------------------------------------------------------------------------------
# DISCUS COPYRIGHT NOTICE
#
# Discus is copyright (c) 2002 by DiscusWare, LLC, all rights reserved.
# The use of Discus is governed by the Discus License Agreement which is
# available from the Discus WWW site at:
# http://www.discusware.com/discus/license
#
# Pursuant to the Discus License Agreement, this copyright notice may not be
# removed or altered in any way.
#-------------------------------------------------------------------------------
use strict;
use vars qw($GLOBAL_OPTIONS $PARAMS $DCONF);
###
### profanity_filter
###
### Controls checking of input for profane words
###
sub profanity_filter {
my ($text_in, $argument) = @_;
my $args = {};
$args->{force_check} = $argument->{force_check} == 0 ? 0 : 1;
return $text_in if $GLOBAL_OPTIONS->{profanity} == 0 && ! $args->{force_check};
return $text_in if $text_in !~ /\S/;
$args->{force_read} = $argument->{force_read} eq "0" ? 0 : 1;
$args->{error_report} = defined $argument->{error_report} ? $argument->{error_report} : 2;
$args->{error_report} = 2 if ($GLOBAL_OPTIONS->{profanity_detected} == 2 && $argument->{error_report} == 1);
$args->{alternate_handler} = $argument->{alternate_handler} if defined $argument->{alternate_handler};
my $result = profanity_filter_internal($text_in, $args, $argument);
return $result if $args->{error_report} != 2;
if (keys(%{ $result->{cuss_found} })) {
if (! defined $args->{alternate_handler}) {
my $error_message = read_language()->{PROFANITY_DETECTED_MESSAGE};
my $l = join("<li>", sort keys(%{ $result->{cuss_found} })); $l = "<li>$l";
$error_message =~ s/\%wordlist/$l/g;
error_message(read_language()->{GENERAL_PROFANITY_ERROR}, $error_message, 0, 1);
} else {
&{ $args->{alternate_handler} }($text_in, $argument, $result->{cuss_found});
}
}
return $text_in;
}
###
### create_regular_expression
###
### Creates a regular expression for a word based on a character map
###
sub create_regular_expression {
my ($word, $charmap) = @_;
my $regexp = "";
my @letters = split(//, $word);
my $i = -1;
foreach my $letter (@letters) {
next if $letter eq chr(92);
$i++;
if ($letter =~ /\s/) {
$regexp .= $letter;
next;
}
my $L = defined $charmap->{$letter} ? $charmap->{$letter} : quotemeta($letter);
next if $L eq "";
$L = join($L, "[", "]") if length($charmap->{$letter}) > 1;
if ($GLOBAL_OPTIONS->{profanity_middle} && $i < $#letters && $i != 0) {
$L = join($L, "(?:", "|[^\\w\\s]+)");
}
if ($GLOBAL_OPTIONS->{profanity_repeated}) {
$L .= "+";
}
if ($GLOBAL_OPTIONS->{profanity_spaces} && $i < $#letters) {
$L .= "[_\\W]*";
}
$regexp .= $L;
}
return $regexp;
}
###
### profanity_filter_internal
###
### Checks a given input for profanity. Replaces naughty words with dots,
### generates an error, or returns a list of the bad words, as requested.
###
sub profanity_filter_internal {
my ($message_in, $arg, $argument) = @_;
my $result = {};
if ($GLOBAL_OPTIONS->{'profanity'} + $arg->{force_check} == 0) {
return $message_in;
}
if ($arg->{force_read} || ! defined $PARAMS->{cuss_expressions}) {
my $cuss = defined $arg->{cuss_in} ? $arg->{cuss_in} : read_cuss_file($arg->{force_read});
my @cuss = ();
my $charmap = {};
foreach my $letter ("a" .. "z") {
$charmap->{$letter} = $letter;
}
if ($GLOBAL_OPTIONS->{profanity_chars}) {
$charmap->{a} =~ s/a/a└┴┬├─┼αßΓπΣσ/gi;
$charmap->{b} =~ s/b/b■▀▐/gi;
$charmap->{c} =~ s/c/c╟τ/gi;
$charmap->{d} =~ s/d/d╨/gi;
$charmap->{e} =~ s/e/e╚╔╩╦ΦΘΩδ/gi;
$charmap->{i} =~ s/i/i╠═╬╧∞φε∩1!/gi;
$charmap->{o} =~ s/o/o╥╙╘╒╓╪≡≥≤⌠⌡÷°/gi;
$charmap->{u} =~ s/u/u┘┌█▄∙·√ⁿ/gi;
$charmap->{y} =~ s/y/y▌² /gi;
}
if ($GLOBAL_OPTIONS->{profanity_symbols}) {
$charmap->{a} =~ s/a/a\@/gi;
$charmap->{i} =~ s/i/i1\!/gi;
$charmap->{l} =~ s/l/l1\!/gi;
$charmap->{o} =~ s/o/o0/gi;
$charmap->{s} =~ s/s/s\\\$5/gi;
}
foreach my $word (@{ $cuss }) {
my $wordsave = $word;
$wordsave =~ s/\(\\S\*\)/\r/g;
$wordsave = create_regular_expression($wordsave, $charmap);
$wordsave =~ s/\r/\\S\*/g;
$wordsave =~ s/\\\\\\(\W)/\\$1/g;
push @cuss, { word => $word, wordrexp => $wordsave };
}
$PARAMS->{cuss_expressions} = \@cuss;
}
my $star_flag = ($GLOBAL_OPTIONS->{profanity_star} eq "" ? '\red{\char{149}}' : $GLOBAL_OPTIONS->{profanity_star});
my $within_html_star_flag = "*";
my @cuss = @{$PARAMS->{cuss_expressions}};
# Prevent tricks like w\b{or}d getting around filtering for 'word'
my $mtemp = $message_in;
$mtemp =~ s/\\char\{(\d+)\}/quotemeta(chr($1))/gie;
$mtemp =~ s/\\(\w+)\{(.*?)\}/$2/g;
$mtemp =~ s/<img[^>]*alt="(.*?)">/\[ $1 \]/gi;
my $cusscount = 0;
my $cusswords = {};
foreach my $word (@cuss) {
my $wordsave = $word->{wordrexp};
if ($arg->{error_report} == 1 || $arg->{globalcheck} == 1) {
while ($mtemp =~ /(^|\W)($wordsave)(\W|$)/ig) {
$cusswords->{$2} = 1; $cusscount++;
}
} else {
if ($mtemp =~ /(^|\W)($wordsave)(\W|$)/i) {
$cusswords->{$2} = 1; $cusscount++;
}
}
}
# Scan the actual message as it was entered
$message_in =~ s/\r\n/\n/g;
$message_in =~ s/\r/\n/g;
my @u = split(/<.*?>/, $message_in);
my @v = (); while ($message_in =~ /<.*?>/g) { push @v, $&; }
my $cusscount_cmp = 0;
my $W = $GLOBAL_OPTIONS->{profanity_international} == 1 ? "[\0-/:-\?\[-`\{-\┐]" : '\W';
foreach my $u (@u) {
study $u;
foreach my $word (@cuss) {
my $wordsave = $word->{wordrexp};
W: while ($u =~ /(^|$W)($wordsave)($W|$)/i) {
$cusscount_cmp++;
last W if $cusscount_cmp > 100;
if ($arg->{error_report} == 1) {
$u = join("", $`, $1, "\r" x length($2), $3, $');
} elsif ($arg->{error_report} == 2) {
$result->{cuss_found}->{$2} = 1;
last W;
} elsif ($arg->{error_report} == 3) {
return $word->{word};
} else {
last W;
}
}
}
$u =~ s/\r/$star_flag/g;
}
foreach my $v (@v) {
next if $v !~ m|<img([^>]+)alt="([^">]+)">|i;
my ($vbef, $vbtwn, $v2, $vaft) = ($`, $1, $2, $');
foreach my $word (@cuss) {
my $wordsave = $word->{wordrexp};
W: while ($v2 =~ /(^|$W)($wordsave)($W|$)/i) {
$cusscount_cmp++;
if ($arg->{error_report} == 1) {
$v2 = join("", $`, $1, "\r" x length($2), $3, $');
} elsif ($arg->{error_report} == 2) {
$result->{cuss_found}->{$2} = 1;
last W;
} elsif ($arg->{error_report} == 3) {
return $word->{word};
} else {
last W;
}
}
$v2 =~ s/\r/$within_html_star_flag/g;
}
$v = join("", $vbef, "<img", $vbtwn, 'alt="', $v2, '">', $vaft);
}
# Compare tricks vs. actual scan
if ($arg->{error_report} == 1) {
if ($cusscount > $cusscount_cmp) {
if (! defined $arg->{alternate_handler}) {
my $error_message = read_language()->{PROFANITY_DETECTED_MESSAGE};
my $l = join("<li>", sort keys(%{ $cusswords })); $l = "<li>$l";
$error_message =~ s/\%wordlist/$l/g;
error_message(read_language()->{GENERAL_PROFANITY_ERROR}, $error_message, 0, 1);
} else {
&{ $arg->{alternate_handler} }($message_in, $argument, $cusswords);
}
}
my $screened = "";
while (scalar(@u) + scalar(@v)) {
$screened .= shift @u;
$screened .= shift @v;
}
return $screened;
} elsif ($arg->{error_report} == 2) {
$result->{cuss_found} = $cusswords;
$result->{screened_message} = $message_in;
} elsif ($arg->{error_report} == 3) {
return undef;
}
return $result;
}
###
### read_cuss_file
###
### Reads in the list of cuss words
###
sub read_cuss_file {
my ($force) = @_;
return $PARAMS->{'naughty_words'} if defined $PARAMS->{'naughty_words'} && ! $force;
my $y = readfile("$DCONF->{admin_dir}/cuss.txt", "naughty_words", { zero_ok => 1, no_lock => 1, no_unlock => 1 });
my @z = ();
foreach my $line (@{ $y }) {
chomp $line;
if ($line =~ m|^\!|) {
$line = $';
$line =~ tr/a-zA-Z/b-zaB-ZA/;
push @z, $line;
} else {
push @z, $line if $line =~ /\S/ && $line !~ /^#/;
}
}
$PARAMS->{'naughty_words'} = \@z;
return \@z;
}
1;