home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 1997 October
/
Chip_1997-10_cd.bin
/
tema
/
sybase
/
powerj
/
samples.z
/
PageSeeker.wxc
< prev
next >
Wrap
Text File
|
1997-01-28
|
6KB
|
253 lines
Save Format v2.0(1)
@begin ClassFile "PageSeeker"
Exported 1;
Abstract 0;
Interface 0;
PackageName "";
Language "Java";
@begin UserFunction "run()"
GencodeSrcLine 13;
FunctionName "PageSeeker::run()";
@end;
@begin UserFunction "Prototype for run()"
Private 1;
GencodeSrcLine -1;
FunctionName "PageSeeker::Prototype for run()";
@end;
@begin UserFunction "loadPage(URL pageURL)"
GencodeSrcLine 60;
FunctionName "PageSeeker::loadPage(URL pageURL)";
@end;
@begin UserFunction "Prototype for loadPage(URL pageURL)"
Private 1;
GencodeSrcLine -1;
FunctionName "PageSeeker::Prototype for loadPage(URL pageURL)";
@end;
@begin UserFunction "extractLinks(String webPage)"
GencodeSrcLine 74;
FunctionName "PageSeeker::extractLinks(String webPage)";
@end;
@begin UserFunction "Prototype for extractLinks(String webPage)"
Private 1;
GencodeSrcLine -1;
FunctionName "PageSeeker::Prototype for extractLinks(String webPage)";
@end;
@begin UserFunction "PageSeeker(String pageURL)"
GencodeSrcLine 114;
FunctionName "PageSeeker::PageSeeker(String pageURL)";
@end;
@begin UserFunction "Prototype for PageSeeker(String pageURL)"
Private 1;
GencodeSrcLine -1;
FunctionName "PageSeeker::Prototype for PageSeeker(String pageURL)";
@end;
@begin HPPPrefixBlock
@begin-code HPPPrefix
// add your custom import statements here
import java.util.*;
import java.net.*;
import java.io.*;
@end-code;
GencodeSrcLine 6;
@end;
@begin ClassContentsBlock
@begin-code ClassContents
// add your data members here
private final static int MAXTHREADS = 4;
private static Hashtable _linkTable = new Hashtable();
URL _pageToFetch;
static TokenIssuer _threadLimiter = new TokenIssuer(MAXTHREADS);
WebCrawler _parent;
@end-code;
GencodeSrcLine 128;
@end;
@begin-code BaseClassList
extends Thread
@end-code;
@begin-code GeneratedClassContents
@end-code;
@begin-code Code "PageSeeker::run()"
//****************************
/**
* Creates a @CLASSNAME@ object which performs the following:
* 1. loads a page from a given URL
* 2. parses out all the imbedded links
* 3. checks links against a hashtable and adds them if they
* are new
* 4. spawns new @CLASSNAME@ threads to visit each new link
*/
public void run()
//****************************
{
this.setPriority(MIN_PRIORITY);
String webPage = new String();
Vector pageLinks;
// only run if there is a free token
_threadLimiter.getToken();
try {
webPage = loadPage(_pageToFetch);
} catch (java.io.IOException badIO) {
System.out.println(_pageToFetch + " returned a bad I/O");
}
pageLinks = extractLinks(webPage);
Enumeration enum = pageLinks.elements();
while(enum.hasMoreElements()) {
String page = (String) enum.nextElement();
if ( !_linkTable.containsKey(page))
{
_linkTable.put(page, page);
_parent.addLink(page);
new @CLASSNAME@(_parent, page);
}
}
_threadLimiter.relinquishToken();
// wait some random time to give all waiting threads a chance
try {
Thread.sleep( (int) (Math.random()*200) );
} catch (Exception e) {}
}
@end-code;
@begin-code Code "PageSeeker::Prototype for run()"
public:
void run();
@end-code;
@begin-code Code "PageSeeker::loadPage(URL pageURL)"
public String loadPage(URL pageURL) throws IOException
//****************************
{
InputStream is = pageURL.openStream();
int oneChar;
StringBuffer sb = new StringBuffer();
while ((oneChar=is.read()) != -1)
sb.append((char)oneChar);
is.close();
return sb.toString();
}
@end-code;
@begin-code Code "PageSeeker::Prototype for loadPage(URL pageURL)"
public:
String loadPage(URL pageURL);
@end-code;
@begin-code Code "PageSeeker::extractLinks(String webPage)"
public Vector extractLinks(String webPage)
//****************************
{
int lastPosition = 0;
int endOfURL;
String link;
Vector newLinks = new Vector();
while(lastPosition != -1 ) {
lastPosition = webPage.indexOf("http://", lastPosition);
if (lastPosition != -1) {
endOfURL = webPage.indexOf(">", lastPosition + 1 );
// extract found hypertext link
link = webPage.substring(lastPosition, endOfURL);
link = link.trim();
if (link.endsWith("\"")) {
link = link.substring(0, link.length() - 1 );
}
// ignore references
if (link.indexOf("#") != -1) {
link = link.substring(0, link.indexOf("#"));
}
// discard links which point explicitly to images
if ( link.endsWith(".gif") ||
link.endsWith(".jpg") ) {
;
} else { // collect all others
newLinks.addElement( link );
}
lastPosition++; // skip current link
}
}
return newLinks;
}
@end-code;
@begin-code Code "PageSeeker::Prototype for extractLinks(String webPage)"
public:
Vector extractLinks(String webPage);
@end-code;
@begin-code Code "PageSeeker::PageSeeker(String pageURL)"
public @CLASSNAME@(WebCrawler parent, String startingPage)
//****************************
{
_parent = parent;
try {
_pageToFetch = new URL(startingPage);
this.setName(startingPage);
start();
} catch (MalformedURLException badURL) {
System.out.println(startingPage + " bad URL");
}
}
@end-code;
@begin-code Code "PageSeeker::Prototype for PageSeeker(String pageURL)"
public:
@@CLASSNAME@(String pageURL);
@end-code;
@end;