home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Programming Languages Suite
/
ProgLangD.iso
/
VCAFE.3.0A
/
Sample.bin
/
LogFile.java
< prev
next >
Wrap
Text File
|
1998-11-05
|
31KB
|
706 lines
// Copyright (c) 1997, 1998 Symantec, Inc. All Rights Reserved.
/*
Parses Common Log Format (CLF), the Extended Log Format, and
Microsoft Professional Internet Services log file format (MPIS)
---- The Common Logfile Format ----
The common logfile format is as follows:
remotehost rfc931 authuser [date] "request" status bytes
remotehost
Remote hostname (or IP number if DNS hostname is not available, or if DNSLookup is
Off.
rfc931
The remote logname of the user.
authuser
The username as which the user has authenticated himself.
[date]
Date and time of the request.
"request"
The request line exactly as it came from the client.
status
The HTTP status code returned to the client.
bytes
The content-length of the document transferred.
---- The Extended Logfile Format adds ----
referringURL
The URL of the previous gotten
userAgent
program generating the HTTP requests
---- MPIS Format ----
Client IP Address,
Client Username,
Date,
Time,
Service,
Computer Name,
IP address of server,
Processing time (ms),
Bytes received,
Bytes sent,
Service status code,
Windows NT status code,
Name of operation,
Target of operation
*/
import java.net.URL;
import java.net.URLConnection;
import java.io.Serializable;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.util.Enumeration;
import java.util.NoSuchElementException;
import java.util.Vector;
import java.util.Date;
import java.util.Calendar;
/*
This class represents a single log file.
It tracks the log file URL and expected format.
It parses the log file and saves the result in an array of LogRecords.
*/
class LogFile implements Serializable {
// Calendar and month strings used for converting log timestamp into millisecs
static Calendar cal = Calendar.getInstance();
static final String MONTHS[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
// Possible log file formats
public static final int FORMAT_AUTO_DETECT = 0;
public static final int FORMAT_COMMON = 1;
public static final int FORMAT_EXTENDED = 2;
public static final int FORMAT_MPIS = 3;
// Always present data. Defines log file this class represents
String logFileURL; // the URL of the log file
int logFileFormat; // the expected log file format. FORMAT_
// Parsed log file records. Call readAndParseLogFile() to create.
LogRecord[] records;
// Info determined when log file is read and parsed.
long earliestTimestamp;
long latestTimestamp;
/*
Constructs a LogFile.
*/
public LogFile() {
//{{INIT_CONTROLS
//}}
}
/*
Frees up non-essential memory used by the log file.
*/
void freeUpMem() {
records = null;
}
// -- ACCESS ROUTINES
public String getLogFileURL() {
return logFileURL;
}
public void setLogFileURL(String logFileURL) {
this.logFileURL = logFileURL;
}
public int getLogFileFormat() {
return logFileFormat;
}
public void setLogFileFormat(int logFileFormat) {
this.logFileFormat = logFileFormat;
}
/*
Read and parse the log file.
This must be called before using any of the get... routines below.
*/
public LogRecord[] readAndParseLogFile(TrackProgress trackProgress) throws ParseLogException {
// init values for scan
earliestTimestamp = Long.MAX_VALUE;
latestTimestamp = 0;
records = null;
Parser parser = new Parser();
parser.parse(trackProgress);
return records;
}
/*
Gets the LogRecords parsed from the log file.
This must be called after readAndParseLogFile() to be valid.
*/
public LogRecord[] getRecords() {
return records;
}
/*
Returns the total number of hits in the log file.
This must be called after readAndParseLogFile() to be valid.
*/
public int getTotalHits() {
return records.length;
}
/*
Gets the earliest timestamp occuring in the log file.
This must be called after readAndParseLogFile() to be valid.
*/
public long getEarliestTimestamp() {
return earliestTimestamp;
}
/*
Gets the latest timestamp occuring in the log file.
This must be called after readAndParseLogFile() to be valid.
*/
public long getLatestTimestamp() {
return latestTimestamp;
}
// -- The Log File Parser
class Parser {
// This class buffers/stores the result of parsing one line of the log file
class LogParseRecord {
// Parse line results
public StringBuffer remoteHost = new StringBuffer();
public StringBuffer remoteUserLogname = new StringBuffer();
public StringBuffer authenticatedUserName = new StringBuffer();
public long requestTimestamp;
public StringBuffer clientRequest = new StringBuffer();
public int httpStatusCode;
public int bytesTransferred;
public StringBuffer referringURL = new StringBuffer();
public StringBuffer userAgent = new StringBuffer(); // browser/agent info
public boolean bExtraText; // There is extra text at the end of this line (ignored)
}
// This class buffers/stores the result of parsing one line of the log file (NT)
class LogParseRecordMPIS {
// Parse line results
public StringBuffer clientIPAddress = new StringBuffer();
public StringBuffer clientUserName = new StringBuffer();
public long requestTimestamp;
public StringBuffer serviceName = new StringBuffer();
public StringBuffer computerName = new StringBuffer();
public StringBuffer serverIPAddress = new StringBuffer();
public int processingMilliseconds;
public int bytesReceived;
public int bytesSent;
public int serviceStatusCode;
public int ntStatusCode;
public StringBuffer operationName = new StringBuffer();
public StringBuffer operationTarget = new StringBuffer();
public StringBuffer dash = new StringBuffer(); // always seems to be a dash...
public StringBuffer blank = new StringBuffer(); // always seems to be blank
public boolean bExtraText; // There is extra text at the end of this line (ignored)
}
// Scanning/parsing vars
BufferedReader logReader;
String scanStr; // line currently being parsed
int scanIdx; // index into line currently being parsed
StringBuffer workBuf = new StringBuffer(); // general use
LogParseRecord logParseRec = new LogParseRecord(); // Results of parsing a line
// NT log file format support
StringBuffer workBuf2 = new StringBuffer(); // general use
LogParseRecordMPIS logParseRecMPIS = new LogParseRecordMPIS(); // Results of parsing a line (NT)
/*
This method reads and parses the log file.
User feedback is provided using the given trackProgress interface object.
A ParseLogException is thrown if a non-ignorable parsing error occurs.
*/
public void parse(TrackProgress trackProgress) throws ParseLogException {
URL url;
URLConnection con;
InputStream is;
int contentLength;
int lineCount = 0;
// Collect garbage mem before doing this potentially memory-intesive task
System.gc();
Vector recordVector = new Vector();
// Initialize File
try {
// Create URL
Analyzer.throwExceptionIfCurrentThreadCancelled();
trackProgress.step("Creating URL...", 1);
url = new URL(logFileURL);
// Get connection to log file
Analyzer.throwExceptionIfCurrentThreadCancelled();
trackProgress.step("Opening connection...", 2);
con = url.openConnection();
// Get input stream of log file
Analyzer.throwExceptionIfCurrentThreadCancelled();
trackProgress.step("Accessing log file...", 3);
is = con.getInputStream();
logReader = new BufferedReader(new InputStreamReader(is));
// Get length of log file (to calc percentage complete)
Analyzer.throwExceptionIfCurrentThreadCancelled();
trackProgress.step("Parsing log file...", 4);
contentLength = con.getHeaderFieldInt("content-length", 100000);
String aLine;
// Loop, reading a line from the log file and parsing it
int contentRead = 0;
for(;;) {
Analyzer.throwExceptionIfCurrentThreadCancelled();
if(null == (aLine = logReader.readLine())) {
break;
}
// percent for this phase ranges from 5 -> 95
contentRead += aLine.length() + 1;
int percent = 5 + ((90 * contentRead)/contentLength);
trackProgress.step("Parsing log file...", percent);
// Process the next log line
lineCount++;
// skip blank lines...and ones that are just too short
if(aLine.length() > 4) {
try {
// Parse the log file line
if(!parseALine(aLine)) {
//ignore this line
continue;
}
// Add this log file line info to the records vector
recordVector.addElement(new LogRecord(logParseRec));
// maintain earliest timestamp info
if(logParseRec.requestTimestamp < earliestTimestamp) {
earliestTimestamp = logParseRec.requestTimestamp;
}
// maintain latest timestamp info
if(logParseRec.requestTimestamp > latestTimestamp) {
latestTimestamp = logParseRec.requestTimestamp;
}
// as needed, zero fields that may not exists next go-round
if(logFileFormat == FORMAT_AUTO_DETECT) {
logParseRec.referringURL.setLength(0);
logParseRec.userAgent.setLength(0);
}
} catch(ParseLogException x) {
// Bad log line encountered. Either silently ignore or give user option.
Data data = Data.getDataInstance();
if(!data.ignoreUnexpectedLogFileErrors) {
String msg ="Ill-formed log line #" + lineCount
+ " (" + x.getMessage()
+ "). Press OK to ignore, Cancel to abort.";
// Show alert and abort if requested
if(!trackProgress.okCancelAlert(msg)) {
trackProgress.step("ABORTED", 0);
throw x;
}
}
}
}
}
// The log file has been read and parsed into a vector of LogRecords.
// Now now copy'em into an array and sort in chronological order.
records = new LogRecord[recordVector.size()];
recordVector.copyInto(records);
recordVector = null; // don't need this vector anymore
Analyzer.throwExceptionIfCurrentThreadCancelled();
WLAUtil.quickSort(records);
// Done OK
return;
} catch(java.net.MalformedURLException x) {
trackProgress.okAlert(x.toString());
trackProgress.step("ABORTED", 0);
throw new ParseLogException("Malformed URL", x);
} catch(java.io.IOException x) {
trackProgress.okAlert(x.toString());
trackProgress.step("ABORTED", 0);
throw new ParseLogException("IO Exception", x);
}
}
// COMMON LOG FORMAT
//syntax: remotehost rfc931 authuser [date] "request" status bytes
//155.64.199.67 - - [06/Apr/1998:22:51:45 +0000] "GET /index.htm HTTP/1.0" 200 2596
// EXTENDED FORMAT
//syntax: remotehost rfc931 authuser [date] "request" status bytes referringURL userAgent
// MS NT FORMAT
//syntax?: remotehost, rfc931, date, time, servicename, appname, somedomain, #1, #2, #3, status#, #5, theRequest, file, dash?, blank?
//155.64.199.156, -, 4/2/98, 18:20:19, W3SVC, VPAGE2, 155.64.35.204, 712224, 271, 28796, 200, 0, GET, /jeannette/text.htm, -,
//155.64.198.182, guest@unknown, 4/2/98, 18:33:55, MSFTPSVC, VPAGE2, -, 190, 916, 0, 0, 0, [64] created , itnews01.htm, -,
//155.64.198.182, bbubb, 4/2/98, 18:42:16, MSFTPSVC, VPAGE2, -, 190, 921, 0, 0, 0, [68] created , itnews01.htm, -,
// Parses the given log file line into the logParseRec object
private boolean parseALine(String aLine) throws ParseLogException {
// ini vars used in the getNext...() methods
scanIdx = 0;
scanStr = aLine;
// Determine if NT logfile format
int idx = aLine.indexOf(' ');
int idx2 = aLine.indexOf(',');
if((idx != -1) && (idx2 != -1) && (idx2 < idx)) {
// Is NT Logfile format
if((logFileFormat != FORMAT_AUTO_DETECT) && (logFileFormat != FORMAT_MPIS)) {
// err...
String str = (logFileFormat == FORMAT_COMMON) ? "not Common Log Format" : "not Extended Log Format";
throw new ParseLogException(str, null);
}
return parseALineMPIS(aLine);
}
try {
// host name or IP number
getNextWord(logParseRec.remoteHost);
// remote user name
getNextWord(logParseRec.remoteUserLogname);
// authenticated user name
getNextWord(logParseRec.authenticatedUserName);
// date/time of request
logParseRec.requestTimestamp = getNextTimestamp();
// the request
getNextQuotedString(logParseRec.clientRequest);
// the request return http status
logParseRec.httpStatusCode = getNextInt();
// the length of the transfer
logParseRec.bytesTransferred = getNextInt();
// Determine if we're at the end of the line...
logParseRec.bExtraText = scanIdx < aLine.length();
// End of Common Log Format. Check for log format compliance
if(logFileFormat == FORMAT_COMMON) {
// Expecting CLF specificly, no extra text allowed
if(logParseRec.bExtraText) {
throw new ParseLogException("extra text at end of log line - not Common Log Format", null);
}
// All OK and done with this line
return true;
}
// If auto-detect format and ends here, OK
if(logFileFormat == FORMAT_AUTO_DETECT && !logParseRec.bExtraText) {
return true;
}
// Either auto with extra text or extended log format.
// Either way, continue parsing
getNextQuotedString(logParseRec.referringURL);
// browser info
getNextQuotedString(logParseRec.userAgent);
// Determine if we're at the end of the line...
logParseRec.bExtraText = scanIdx < aLine.length();
// If Extended specificly requested, no extra text allowed
if(logFileFormat == FORMAT_EXTENDED && logParseRec.bExtraText) {
throw new ParseLogException("extra text at end of log line - not Extended Log Format", null);
}
// All OK
} catch(ArrayIndexOutOfBoundsException x) {
throw new ParseLogException("unexpected end of line");
}
return true;
}
/*
Parses the next "word" in the current line.
On entry and exit scanStr[scanIdx] always refers to non-whitespace
character (or past end of line).
*/
private void getNextWord(StringBuffer destBuf) throws ParseLogException {
// find end of word
int endIdx = scanStr.indexOf(' ', scanIdx+1);
if(endIdx < 0) {
endIdx = scanStr.length();
}
// extract substring into destBuf
destBuf.setLength(0);
destBuf.append(scanStr.substring(scanIdx, endIdx));
// keep track of where we are scanning
scanIdx = endIdx+1;
}
/*
Parses the next integer in the current line.
On entry and exit scanStr[scanIdx] always refers to non-whitespace
character (or past end of line).
*/
private int getNextInt() throws ParseLogException {
//Gather word
getNextWord(workBuf);
// Convert
try {
if(workBuf.length() == 1 && workBuf.charAt(0) == '-') {
return 0;
}
return Integer.parseInt(workBuf.toString());
} catch(NumberFormatException x) {
throw new ParseLogException("invalid number format", x);
}
}
/*
Parses a double-quoted string in the current line.
The string
On entry scanStr[scanIdx] always should be a '"' character.
On exit scanStr[scanIdx] always refers to non-whitespace character
(or past end of line).
*/
private void getNextQuotedString(StringBuffer destBuf) throws ParseLogException {
// Check for expected initial quote
char ch = scanStr.charAt(scanIdx);
if(ch != '"') {
throw new ParseLogException("expecting starting quote");
}
// skip leading quote
scanIdx++;
// find end of string, signalled by quote followed by a space
int endIdx = scanIdx;
int lim = scanStr.length();
do {
// determine end of quote.
// If newline within quote, append next line to quoted string..
int wasEndIdx = endIdx;
for(;;) {
endIdx = scanStr.indexOf('"', endIdx);
if(endIdx >= 0) {
break;
}
String appendStr;
try {
appendStr = logReader.readLine();
} catch(java.io.IOException x) {
throw new ParseLogException("IO exception", x);
}
if(appendStr == null) {
throw new ParseLogException("EOF instead of ending quote");
}
scanStr += "\n" + appendStr;
// try again
endIdx = wasEndIdx;
}
} while(++endIdx < lim && scanStr.charAt(endIdx) != ' ');
// extract substring into destBuf
destBuf.setLength(0);
destBuf.append(scanStr.substring(scanIdx, endIdx-1));
// keep track of where we are scanning
scanIdx = endIdx+1;
}
/*
Parses a log timestamp in the current line (like: [02/Feb/1998:02:52:36 -0800]).
On entry scanStr[scanIdx] always should be a '[' character.
On exit scanStr[scanIdx] always refers to non-whitespace character
(or past end of line).
*/
private long getNextTimestamp() throws ParseLogException {
// -- Gather timestamp
// Check for expected initial quote
char ch = scanStr.charAt(scanIdx);
if(ch != '[') {
throw new ParseLogException("expecting starting square bracket");
}
// skip leading square bracket
scanIdx++;
// find end of timestamp
int endIdx = scanStr.indexOf(']', scanIdx);
if(endIdx < 0) {
throw new ParseLogException("EOL instead of ending square bracket");
}
// note start of timestamp
int idx = scanIdx;
// keep track of where we are scanning
scanIdx = endIdx+2;
// -- Parse the timestamp: 02/Feb/1998:02:52:36 -0800
try {
// Convert parts of timestamp
int day = Integer.parseInt(scanStr.substring(idx, idx+2));
int month = MonthStringToInt(scanStr.substring(idx+3, idx+6));
int year = Integer.parseInt(scanStr.substring(idx+7, idx+11));
int hour = Integer.parseInt(scanStr.substring(idx+12, idx+14));
int min = Integer.parseInt(scanStr.substring(idx+15, idx+17));
int sec = Integer.parseInt(scanStr.substring(idx+18, idx+20));
int zoneSign= 1;
// parseInt doesn't handle leading positive sign: "+0". Check for it.
int i = idx+21;
char zoneSignChar = scanStr.charAt(i);
if(zoneSignChar == '+') {
i++;
} else if(zoneSignChar == '-') {
i++;
zoneSign = -1;
}
int zoneHour= Integer.parseInt(scanStr.substring(i, idx+24));
int zoneMin = Integer.parseInt(scanStr.substring(idx+24, idx+26));
// Combine parts into whole
cal.set(year, month, day, hour, min, sec);
// Calc timestamp zone adjustment
int zoneAdj = zoneSign * (zoneHour * (int)WLAUtil.MILLISECS_PER_HOUR + zoneMin * (int)WLAUtil.MILLISECS_PER_MINUTE);
// adjust for timezone we're in
int addToUTC = cal.getTimeZone().getRawOffset();
addToUTC -= zoneAdj;
Date date = cal.getTime();
long millisecs = date.getTime();
millisecs += addToUTC;
// Done OK
return millisecs;
} catch(NumberFormatException x) {
throw new ParseLogException("invalid number format", x);
}
}
// Converts month string into month number (0 == Jan)
private int MonthStringToInt(String monthStr) throws ParseLogException {
for(int m = 0; m < MONTHS.length; m++) {
if(monthStr.equalsIgnoreCase(MONTHS[m])) {
return m;
}
}
throw new ParseLogException("unrecognized month: " + monthStr);
}
// -------- MPIS FORMAT --------
// MS NT FORMAT
//syntax?: remotehost, rfc931, date, time, servicename, appname, somedomain, #1, #2, #3, #4, #5, theRequest, file, dash?, blank?
//155.64.199.156, -, 4/2/98, 18:20:19, W3SVC, VPAGE2, 155.64.35.204, 712224, 271, 28796, 200, 0, GET, /jeannette/text.htm, -,
//155.64.198.182, guest@unknown, 4/2/98, 18:33:55, MSFTPSVC, VPAGE2, -, 190, 916, 0, 0, 0, [64] created , itnews01.htm, -,
//155.64.198.182, bbubb, 4/2/98, 18:42:16, MSFTPSVC, VPAGE2, -, 190, 921, 0, 0, 0, [68] created , itnews01.htm, -,
// Parses the given log file line into the logParseRec object
private boolean parseALineMPIS(String aLine) throws ParseLogException {
// ini vars used in the getNext...() methods
scanIdx = 0;
scanStr = aLine;
try {
getNextWordMPIS(logParseRecMPIS.clientIPAddress);
getNextWordMPIS(logParseRecMPIS.clientUserName);
logParseRecMPIS.requestTimestamp = getNextTimestampMPIS();
getNextWordMPIS(logParseRecMPIS.serviceName);
getNextWordMPIS(logParseRecMPIS.computerName);
getNextWordMPIS(logParseRecMPIS.serverIPAddress);
logParseRecMPIS.processingMilliseconds = getNextIntMPIS();
logParseRecMPIS.bytesReceived = getNextIntMPIS();
logParseRecMPIS.bytesSent = getNextIntMPIS();
logParseRecMPIS.serviceStatusCode = getNextIntMPIS();
logParseRecMPIS.ntStatusCode = getNextIntMPIS();
getWordsTillComma(logParseRecMPIS.operationName);
getNextWordMPIS(logParseRecMPIS.operationTarget);
getNextWordMPIS(logParseRecMPIS.dash);
getNextWordMPIS(logParseRecMPIS.blank);
// Determine if we're at the end of the line...
logParseRecMPIS.bExtraText = scanIdx < aLine.length();
// If Extended specificly requested, no extra text allowed
if(logParseRecMPIS.bExtraText) {
throw new ParseLogException("extra text at end of log line - not Windows NT format", null);
}
// Ignore non-HTTP log lines
if(!logParseRecMPIS.serviceName.toString().equals("W3SVC")) {
return false;
}
// Convert NT info into std info
logParseRec.remoteHost = logParseRecMPIS.clientIPAddress;
logParseRec.remoteUserLogname = logParseRecMPIS.clientUserName;
logParseRec.authenticatedUserName.setLength(0);
logParseRec.requestTimestamp = logParseRecMPIS.requestTimestamp;
logParseRec.clientRequest.setLength(0);
logParseRec.clientRequest.append(logParseRecMPIS.operationName);
logParseRec.clientRequest.append(' ');
logParseRec.clientRequest.append(logParseRecMPIS.operationTarget);
logParseRec.clientRequest.append(" HTTP/1.0");
logParseRec.httpStatusCode = logParseRecMPIS.serviceStatusCode;
logParseRec.bytesTransferred = logParseRecMPIS.bytesSent;
logParseRec.referringURL.setLength(0);
logParseRec.userAgent.setLength(0);
logParseRec.bExtraText = logParseRecMPIS.bExtraText;
// All OK
return true;
} catch(ArrayIndexOutOfBoundsException x) {
throw new ParseLogException("unexpected end of line");
}
}
/*
Parses the next word in the current line, up till the next comma.
On entry and exit scanStr[scanIdx] always refers to non-whitespace
character (or past end of line).
*/
private void getWordsTillComma(StringBuffer destBuf) throws ParseLogException {
// find end of word
int endIdx = scanStr.indexOf(',', scanIdx+1);
if(endIdx < 0) {
endIdx = scanStr.length();
}
// extract substring into destBuf
destBuf.setLength(0);
destBuf.append(scanStr.substring(scanIdx, endIdx));
// keep track of where we are scanning
scanIdx = endIdx+1;
if(scanIdx < scanStr.length() && scanStr.charAt(scanIdx) == ' ') {
scanIdx++;
}
}
/*
Parses the next "word" in the current line.
On entry and exit scanStr[scanIdx] always refers to non-whitespace
character (or past end of line).
*/
private void getNextWordMPIS(StringBuffer destBuf) throws ParseLogException {
getWordsTillComma(destBuf);
}
/*
Parses the next integer in the current line.
On entry and exit scanStr[scanIdx] always refers to non-whitespace
character (or past end of line).
*/
private int getNextIntMPIS() throws ParseLogException {
//Gather word
getNextWordMPIS(workBuf);
// Convert
try {
if(workBuf.length() == 1 && workBuf.charAt(0) == '-') {
return 0;
}
return Integer.parseInt(workBuf.toString());
} catch(NumberFormatException x) {
throw new ParseLogException("invalid number format", x);
}
}
/*
Parses a log timestamp in the current line (like: 4/2/98, 18:20:19, ).
On entry scanStr[scanIdx] always should be the first char of the month int.
On exit scanStr[scanIdx] always refers to non-whitespace character
(or past end of line).
*/
private long getNextTimestampMPIS() throws ParseLogException {
Date d;
// parse timestamp in this format: 4/2/98 18:20:19
try {
getWordsTillComma(workBuf); // date
d = WLAUtil.string2Date(workBuf.toString());
} catch(java.text.ParseException x) {
throw new ParseLogException("Invalid NT date timestamp format: \""+workBuf+"\"", x);
}
try {
getWordsTillComma(workBuf);
long delta = WLAUtil.string2TimeDelta(workBuf.toString());
return d.getTime() + delta;
} catch(java.text.ParseException x) {
throw new ParseLogException("Invalid NT time timestamp format: \""+workBuf+"\"", x);
}
}
}
//{{DECLARE_CONTROLS
//}}
}