home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 35 Internet
/
35-Internet.zip
/
countv01.zip
/
countHTTP.cmd
next >
Wrap
OS/2 REXX Batch file
|
1997-12-11
|
9KB
|
247 lines
extproc perl -x -S
#!/usr/bin/perl
# Parse PowerWeb HTTP log and calculate server statistics.
#
# countHTTP Version 0.1 [10/12/1997]
#
# Copyright (C) 1997 by Ivan Adzhubei
#
# Web address: http://www.protein.bio.msu.su/
# Send comments to: ivan@protein.bio.msu.su
# Month numbers
%monthNum = (
Jan => '01',
Feb => '02',
Mar => '03',
Apr => '04',
May => '05',
Jun => '06',
Jul => '07',
Aug => '08',
Sep => '09',
Oct => '10',
Nov => '11',
Dec => '12',
);
# A few defaults
$logFile = '/powerweb/logs/HTTP.log';
$localIP = '127.0.0.1';
$localHost = 'localhost';
# These are IP address range and domain name for the client requests which are
# excluded from all statistics calculations. Usefull to exclude your own hosts
# (producing lots of requests from your attempts to test/configure PW) from
# countHTTP results. Configurable via command line options -en/-ed, but you may
# place defaults for your net right here in the code to avoid typing excessively
# long command line each time you start countHTTP.
#
# *** UNCOMMENT AND EDIT THE TWO LINES BELOW ***
#
#$myNetwork = '111.222.333';
#$myDomain = 'mydomain.name.com';
# Parse command line options
while (($arg = shift @ARGV) && $arg =~ /^-/) {
&printUsage, exit if $arg =~ /^-(\?|h(elp)?)$/;
$logFile = $1, next if $arg =~ /^-f(.+)/;
$myNetwork = $1, next if $arg =~ /^-en(.+)?/;
$myDomain = $1, next if $arg =~ /^-ed(.+)?/;
$resolveName = 1, if $arg =~ /n/;
$printLoad = 1, if $arg =~ /l/;
$printDocs = 1, if $arg =~ /d/;
$printClients= 1, if $arg =~ /c/;
$strictHTML = 1, if $arg =~ /s/;
$printTotals = 1, if $arg =~ /t/;
# Stop on -<date>, since we assume this is an ending date parameter.
last if $arg =~ /^-\d+\/\d+\/\d+$/;
}
# Next arg is date range?
$arg =~ /^(\d+\/\d+\/\d+)?-(\d+\/\d+\/\d+)?$/;
# Yes, argument looks like a date range
if ($1 || $2) {
$firstDate = $1; $lastDate = $2;
if ($firstDate) {
$firstDate =~ /(\d+)\/(\d+)\/(\d+)/;
$firstYear = $3 + 0; $firstMonth = $2 + 0; $firstDay = $1 + 0;
die "Illegal starting date format\n" if $firstMonth < 1 || $firstMonth > 12 ||
$firstDay < 1 || $firstDay > 31;
$packedFirst = $firstYear * 12 * 31 + ($firstMonth - 1) * 31 + $firstDay - 1;
}
if ($lastDate) {
$lastDate =~ /(\d+)\/(\d+)\/(\d+)/;
$lastYear = $3 + 0; $lastMonth = $2 + 0; $lastDay = $1 + 0;
die "Illegal ending date format\n" if $lastMonth < 1 || $lastMonth > 12 ||
$lastDay < 1 || $lastDay > 31;
$packedLast = $lastYear * 12 * 31 + ($lastMonth - 1) * 31 + $lastDay - 1
}
$arg = '';
}
$arg = shift @ARGV if @ARGV;
open(LOG,$logFile) || die "Can't open file \"$logFile\"\n";
while (<LOG>) {
chomp;
next if /^\s*$/;
/^(\S+)\s+(\S+)\s+(\S+)\s+\[(.+?)\]\s+"(.+?)"/;
$client = $1; $server = $2; $auth = $3; $datetime = $4; $request = $5;
print STDERR "WARNING! - Malformed log line:\n$_\n"
unless $client && $server && $auth && $datetime && $request;
$request =~ /^\w+\s+(\S+)/; $document = $1;
$datetime =~ /^(\d+\/\w+\/\d+):([\d:]+)/; $date = $1; $time = $2;
$date =~ s/\/(\w+)\//\/$monthNum{$1}\//;
$date =~ /^(\d\d)\/(\d\d)\/(\d\d\d\d)/; $day = $1; $month = $2; $year = $3;
$day += 0; $month += 0; $year += 0;
$time =~ /^(\d\d):\d\d:\d\d/; $hour = $1 + 0;
$firstLog = $date unless $firstLog;
next if $client =~ /^$localIP/ || $client =~ /^$localHost$/;
next if ($myNetwork && $client =~ /^$myNetwork/) ||
($myDomain && $client =~ /$myDomain$/i);
next if $strictHTML && $document !~ /^[^$&?=]*?\/(\w+(\.htm(l)?)?)?$/i;
next if $strictHTML && $document =~ /\/(cgi-bin|perl-bin|rexx-bin)\//i;
$packedDate = $year * 12 * 31 + ($month - 1) * 31 + $day - 1
if $firstDate || $lastDate;
next if ($firstDate && $packedDate < $packedFirst) ||
($lastDate && $packedDate > $packedLast);
next if $arg && $document !~ /$arg/i;
$totalRequests{$document}{$client}++;
$totalLoad{$year}{$month}{$day}{$hour}++;
}
close(LOG);
print STDERR "Resolving client names" if $resolveName;
foreach $document (keys %totalRequests) {
foreach $client (keys %{$totalRequests{$document}}) {
print STDERR '.' if $resolveName;
$clientName{$client} = '';
unless (!$resolveName || $clientName{$client}) {
$cliName = '';
@lookupResults = `nslookup $client 2>nul`;
foreach $line (@lookupResults) {
chomp($line);
$cliName = $1 if $line =~ /^Name:\s+(\S+)/;
}
$clientName{$client} = $cliName if $cliName;
}
$documentCount{$document} += $totalRequests{$document}{$client};
$clientName{$client} = $client unless $clientName{$client};
$clientCount{$clientName{$client}} += $totalRequests{$document}{$client};
$totRequests += $totalRequests{$document}{$client};
}
}
print STDERR "\n" if $resolveName;
if ($printDocs) {
foreach $document (sort {$documentCount{$b}<=>$documentCount{$a}} keys %documentCount) {
print "$document:\n" if $printDocs;
unless ($printTotals) {
foreach $client (sort {$totalRequests{$document}{$b}<=>$totalRequests{$document}{$a}} keys %{$totalRequests{$document}}) {
printf " %-36s%5s\n", $clientName{$client}, "($totalRequests{$document}{$client})";
}
}
printf " Subtotal:%32s\n", "($documentCount{$document})";
}
}
if ($printClients) {
foreach $client (sort {$clientCount{$b}<=>$clientCount{$a}} keys %clientCount) {
printf "%-36s%5s\n", $client, "($clientCount{$client})";
}
}
$firstDate = $firstLog unless $firstDate;
$lastDate = $date unless $lastDate;
print "\n------\nTotal ", $strictHTML ? 'HTML ' : '', "requests $firstDate - $lastDate";
print " for '$arg'" if $arg;
print ": $totRequests\n";
exit unless $printLoad;
foreach $year (sort keys %totalLoad) {
foreach $month (sort keys %{$totalLoad{$year}}) {
foreach $day (sort keys %{$totalLoad{$year}{$month}}) {
foreach $hour (sort keys %{$totalLoad{$year}{$month}{$day}}) {
$monthLoad{$year}{$month} += $totalLoad{$year}{$month}{$day}{$hour};
$hourLoad{$hour} += $totalLoad{$year}{$month}{$day}{$hour};
$hourList{$hour}++;
}
}
}
}
for $hour (0..23) {
$hourList{$hour}++ unless $hourList{$hour};
$aveLoad = $hourLoad{$hour} / $hourList{$hour};
$maxLoad = $aveLoad > $maxLoad ? $aveLoad : $maxLoad;
$totLoad += $aveLoad;
push @averageLoad, $aveLoad;
}
$maxLoad = $maxLoad / $totLoad * 100;
print "\n------\nServer daily load (percent/hour):\n\n";
for ($percent=int($maxLoad+0.5); $percent>0; $percent--) {
printf("%5s",($percent % 5) ? '|' : "$percent-|");
for $hour (0..23) {
$percentLoad = $averageLoad[$hour] / $totLoad * 100;
print (int($percentLoad+0.5) >= $percent ? '*' : '.');
}
print "\n";
}
print " 0-|", '-' x 24, "\n";
print " | | | | | | | |\n";
print " 0 3 6 9 12 15 18 21\n";
print "\n------\nServer load profile (hits/month):\n\n";
foreach $year (sort {$a<=>$b} keys %monthLoad) {
foreach $month (sort {$a<=>$b} keys %{$monthLoad{$year}}) {
printf "%d/%02d:%8d\n", $year, $month, $monthLoad{$year}{$month};
}
}
sub printUsage {
print <<EOT
countHTTP for PowerWeb++ Server Version 0.1 [10/12/1997]
usage: countHTTP [options] [date_range] [tag_regexp]
[date_range] is in form: dd/mm/yyyy-dd/mm/yyyy, either start or end date
is optional, but '-' separator is mandatory;
[tag_regexp] is Perl style regular expression, only matching documents are
counted; must be protected from shell in usual way (eg. quoted).
[options] are from among:
-h(elp) show (this) help screen; -? also works.
-s strict HTML mode, only requests ending in .htm(l) are counted,
form GET/POST's, requests for graphics, etc., are ignored.
-flogFile specify log file name (inc. path); default is to open HTTP.log
in /powerweb/logs dir on the current drive.
-enNET_IP exclude NET_IP range (eg. 111.222.333) from all statistics.
-edDOMAIN exclude DOMAIN name (eg. mydomain.com) from all statistics;
set both -e options to your own net/domain to exclude all of your
own testing/configuration requests to server from statistics.
-n resolve and print client names instead of IP addresses; may take
really LONG time for large log files with many requests.
-l print server load statistics: average daily and per month.
-d print document requests statistics: document hits per client.
-c print client statistics: total hits per each client.
-t print only totals/subtotals, less detailed output.
EOT
}