home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: Java
/
Java.zip
/
jload18.zip
/
loader.java
< prev
next >
Wrap
Text File
|
2000-04-24
|
21KB
|
831 lines
import java.util.*;
import java.io.*;
import java.net.*;
public final class loader implements Runnable
{
private final static float MAX_QPRIO=9999.99f;
public static final int IO_BUFFER=4096;
public static final String VERSION="0.18";
public static final String NAME="Smart Cache Loader";
public static final String COPYRIGHT="Copyright (c) Radim Kolar 1998-2000. Open source software; There is NO warranty.\n"+
"See the GNU General Public Licence version 2 or later for copying conditions.";
public static final int THREADS=4; // like in Netscape
public static final char EXPANDCHAR='@'; // include this file as argz
public static final char STARTURLCHAR='^'; // start URL for known location
public static final char CONFIGCHAR='#'; // also comment in include file
public static final char OPTIONCHAR='-'; // command line option
public static final char VISITEDCHAR=':'; // already visited
public static final char DEFAULTURLCHAR='%'; // configure this url as default
public static final String DEFAULTCFG="loader.cnf";
public static byte maxretry=3;
public static float retryprio=-1f;
public static location loc[];
public static location def;
public static InetAddress proxyserver;
public static int proxyport;
public static boolean readonly;
public static localstore store;
public static priorityqueue pq;
private static Hashtable visited;
// thread control
public static short maxThreads;
public static volatile int now;
// worker non-static data
public request r;
Socket s;
public static void main(String argv[]) throws IOException
{
configloader cfg;
System.err.println(NAME+" "+VERSION+"\n"+COPYRIGHT+"\n");
if(argv.length>0)
if(argv[0].charAt(0)!=CONFIGCHAR) configloader(DEFAULTCFG);
else ;
else
configloader(DEFAULTCFG);
init(argv);
try
{
pq.peek();
start();
store.close();
System.err.println(NAME+" "+VERSION+" - end of job.");
}
catch (NoSuchElementException nse)
{
System.err.println("\n[LOADER] No active servers - nothing done.");
}
}
public loader(request r)
{
this.r=r;
}
public static final void init(String argv[])
{
// program init
pq=new priorityqueue(5,10);
visited=new Hashtable();
maxThreads=THREADS;
now=0;
// expand arguments
Vector argz;
argz=new Vector(argv.length,5);
exparg:
for(int i=0;i<argv.length;i++)
{
String s;
s=argv[i];
if(s.length()==0) continue;
if(s.charAt(0)==EXPANDCHAR)
insertFile(s.substring(1),argz,false);
else
if(s.charAt(0)==VISITEDCHAR) {
if(s.length()<2) continue;
else if(s.charAt(1)==EXPANDCHAR)
insertFile(s.substring(2),argz,true);
} else // normal argument
argz.addElement(s);
}
argv=null;
argscan:for(int i=0;i<argz.size();i++)
{
String s;
s=(String)argz.elementAt(i);
if(s==null || s.length()==0) continue; // NULL size option ??
/* special chars stuff */
if(s.charAt(0)==OPTIONCHAR)
{
System.err.println("[PARAMETER_ERROR] Options are not YET supported.");
continue; // TODO options are not YET supported
}
if(s.charAt(0)==CONFIGCHAR) {
if(i!=0) System.err.println("[PARAMETER_ERROR] Config file must be the first parameter. All previous parameters are overwritten by it.");
configloader(s.substring(1));
continue;
}
if(s.charAt(0)==VISITEDCHAR)
{
String z;
z=s.substring(1);
try
{
new URL(z);
visited.put(z,z);
}
catch (MalformedURLException ignore)
{}
finally
{continue;}
}
/* URL on commandline ? */
if(s.indexOf("://")>0)
{
if(s.charAt(0)==STARTURLCHAR)
{
}
// inject new site
location site;
if(s.endsWith("/")) { site=createNewLocation(s);
}
else
{
try
{
/* strip filename from location */
URL u=new URL(s);
site=createNewLocation(u.getProtocol()+"://"+u.getHost()+util.getDirname(u.getFile()));
}
catch (MalformedURLException grrr)
{ continue;}
}
site.passive=false;
site.masks=site.content=site.action=location.INCLUDING_DEFAULTS;
// add pending DNS aliases
site.transferAliases(def);
// startURL fun
site.addStartURL(s);
site.transferStartURL(def);
loc=util.addLocationToArray(site,loc);
continue; // new site injected
}
/* option on commandline ? */
if(s.indexOf("=",0)>-1)
{
StringTokenizer st=new StringTokenizer(s);
String opt=st.nextToken("=");
opt=opt.toLowerCase().trim();
if(opt.equals("scandepth")|| opt.equals("depth"))
{
short c=(short)Integer.valueOf(st.nextToken()).intValue();
def.setDepth(c);
continue;
}
else if(opt.equals("threads"))
{
maxThreads=(short)Integer.valueOf(st.nextToken()).intValue();
if(maxThreads<=0) maxThreads=THREADS;
continue;
}
else if(opt.equals("retry"))
{
maxretry=(byte)Integer.valueOf(st.nextToken()).intValue();
continue;
}
else if(opt.equals("retrypriority"))
{
retryprio=Float.valueOf(st.nextToken()).floatValue();
continue;
}
else if(opt.equals("options"))
{
options o=new options(s);
def.serveroptions(o);
continue;
}
else if(opt.equals("priority"))
{
float c=Float.valueOf(st.nextToken()).floatValue();
def.setPriority(c);
continue;
}
else if(opt.equals("locationalias") || opt.equals("alias"))
{
do
{
String z;
z=st.nextToken(" ,&\t\r\n");
def.addAlias(z);
}
while(st.hasMoreTokens());
continue;
}
else if(opt.equals("starturl"))
{
def.addStartURL(st.nextToken());
continue;
}
else if(opt.equals("log"))
{
options o=new options(s);
def.addActions(o,true);
continue;
}
else if(opt.equals("upd"))
{
options o=new options(s);
def.addActions(o,true);
continue;
}
else
System.err.println("[PARAMETER_ERROR] Unknown option "+opt);
continue;
}
// scan for alias
boolean neg;
if(s.charAt(0)=='!') { neg=true; s=s.substring(1);} else neg=false;
for(int j=loc.length-1;j>=0;j--)
{
if(s.equalsIgnoreCase(loc[j].name)) { loc[j].passive=neg;continue argscan;}
}
System.err.println("[PARAMETER_ERROR] Location named '"+s+"' was not found.");
} /* argscan loop */
readonly=store.isReadOnly();
// add ACTIVE sites to QUEUE
for(int j=loc.length-1;j>=0;j--)
if(loc[j].passive==false)
if(loc[j].starturl==null)
addToQueue(new request(loc[j].locbase,loc[j]),MAX_QPRIO);
else
for(int z=loc[j].starturl.length-1;z>=0;z--)
addToQueue(new request(loc[j].starturl[z],loc[j]),MAX_QPRIO);
}
private final static void insertFile(String filename,Vector v,boolean vis)
{
try
{
// insert argz from file;
BufferedReader br=new BufferedReader(new FileReader(filename));
while(true)
{
String l;
boolean vv;
vv=vis;
l=br.readLine();
if(l==null) break;
if(l.length()==0) continue;
l=l.trim();
if(l.charAt(0)=='#') continue; // komentar
if(l.charAt(0)==VISITEDCHAR)
if(l.length()>1) { vv=true;l=l.substring(1).trim();}
else
continue; // missing URL
if(l.charAt(0)==EXPANDCHAR)
{
String fn=l.substring(1);
if(!visited.contains(fn))
{
visited.put(fn,fn);
insertFile(fn,v,vv);
}
continue;
}
if(vv==true) v.addElement(":"+l);
else v.addElement(l);
}
br.close();
}
catch (IOException z)
{}
}
/* m a i n l o o p */
public final void run()
{
try
{
DataInputStream dis;
DataOutputStream dos=null;
URL target=new URL(r.url);
String line;
localurl local;
if(r.log==mask.LOG_SERVERDEFAULT)
r.log=(r.loc.defaultmask.log==mask.LOG_SERVERDEFAULT?
mask.LOG_DEFAULT:r.loc.defaultmask.log);
log("Processing",mask.LOG_QUEUE);
local=store.getURL(r.url);
// test zda nahravat ze serveru nebo z local filesystemu
if(
/* test na norefresh/noreparse */
((r.update==mask.UPD_NOREFRESH || r.update==mask.UPD_NOREPARSE) && local.exists()) ||
/* test na update/forceupdate */
((r.update==mask.UPD_UPDATE || r.update==mask.UPD_FORCEUPDATE) &&
(local.exists() && local.getDate()+r.updatelimit<System.currentTimeMillis())
)
)
{
try
{
/* Loading from local filesystem */
log("Stored",mask.LOG_STORED);
if(local.getLocation()!=null)
{
request nr=(request)r.clone();
nr.url=new URL(target,local.getLocation()).toString();
addToQueue(nr,r.loc.priority);
}
if(r.update==mask.UPD_NOREPARSE || !local.isParseable())
{ done();return;} // no need to load it
dis=
new DataInputStream (
new BufferedInputStream(local.getInputStream(),IO_BUFFER)
);
}
catch (IOException iof)
{
System.err.println("Reading from localfile failed, turning update off.");
r.update=mask.UPD_LOAD;
throw iof;
}
}
else
{
log("Loading",mask.LOG_LOAD);
// connect to TCP/IP data source
if(proxyserver==null||r.act==mask.ACT_NOPROXY)
{
// Direct connection to remote server
String proto=target.getProtocol();
if(!proto.equalsIgnoreCase("http"))
{
log("Unsupported protocol",mask.LOG_FATALERR);
done();
return;
};
int p=target.getPort();
sendHTTPrequest(InetAddress.getByName(target.getHost()),p==-1? 80: p,target.getFile());
}
else
// Send request to proxy
sendHTTPrequest(proxyserver,proxyport,r.url);
if(r.act==mask.ACT_FASTCLOSE) {s.close();done();return;}
// otevrit data input stream z http serveru
dis=new DataInputStream(new BufferedInputStream(s.getInputStream(),IO_BUFFER));
/* HTTP-HEADER PARSING START */
int ctsize=-1;
int httprc;
// String ctype="application/octet-stream";
line=dis.readLine(); /* HTTP/1.0 XX OK */
/* precteme si tedy httprc kod */
StringTokenizer st;
st=new StringTokenizer(line);
/* WARN: tady to spadne pri remote HTTP 0.9 serveru */
try
{
st.nextToken(); /* http/1.0 - nezajimave */
httprc=Integer.valueOf(st.nextToken()).intValue();
}
catch (Exception http09)
{
log("HTTP 0.9 response",mask.LOG_FATALERR);
s.close();done();return;
}
/* cteme hlavicky */
while(true)
{
int j;
String s1,s2;
line=dis.readLine();
if(line==null) break;
if(line.length()==0) break;
j=line.indexOf(':',0);
if(j==-1) continue;
s1=line.substring(0,j).toLowerCase();
s2=line.substring(j+1).trim();
if(s1.equals("content-length"))
try
{
ctsize=Integer.valueOf(s2).intValue();
}
catch (Exception ignore)
{}
finally
{ continue;}
if(s1.equals("content-type") && !s2.toLowerCase().startsWith("text/html"))
{
if(r.act==mask.ACT_CLOSE) {s.close();done();return;}
else
r.act=mask.ACT_NOPARSE;
}
if(s1.equals("location"))
{
/* Location: handler */
request nr=(request)r.clone();
nr.url=new URL(target,s2).toString();
addToQueue(nr,r.loc.priority);
continue;
}
} /* hlavicky */
if(httprc!=200) {s.close();
done();
log("Error "+httprc,mask.LOG_ERR);
return;
}
/* ***** SAVE as ******** */
if( r.act!=mask.ACT_NOSAVE && readonly==false)
{
try
{
dos=new DataOutputStream(new BufferedOutputStream(local.getOutputStream(),IO_BUFFER));
log("Saving",mask.LOG_SAVE);
}
catch (IOException iof)
{
log("Save error",mask.LOG_ERR);
dos=null;
}
} /* open file 4 save stuff */
} /* end if load from hadr */
// muzeme tedy zacit zpracovavat data
if(r.act==mask.ACT_NOPARSE || r.depth==-1)
{
// jen ulozit a ahoj :)
saver(dis,dos);
done();
return;
}
/* **** P A R S E E N G I N E **** */
/* (hacked from watchit) */
htmlscanner hscan;
hscan=new htmlscanner(dis,dos);
log("Parsing",mask.LOG_PARSE);
Vector urls=new Vector();
Vector srcs=new Vector();
boolean anyframe=false;
while(true)
{
Hashtable x;
String s;
try{
x=hscan.getElement();
if(x==null) break; // EOF?
line=(String)x.get("");
if(line==null) continue; // null tag?
// System.out.println("tag="+line);
if(line.equals("FRAME")) anyframe=true;
/* META - REFRESH HANDLER */
if(line.equals("META"))
{
s=(String)x.get("HTTP-EQUIV");
if(s==null) continue;
s=s.trim();
if(!s.equalsIgnoreCase("Refresh")) continue;
s=(String)x.get("CONTENT");
if(s==null) continue;
s=s.trim();
int j;
j=s.indexOf(';');
if(j==-1) continue;
try
{
j=Integer.valueOf(s.substring(0,j)).intValue();
}
catch (NumberFormatException z)
{
continue;
}
/* vice nez XX sekund - ignorujeme to */
if(j>45) continue;
j=s.indexOf('=');
if(j==-1) continue;
s=s.substring(j+1).trim();
URL url2;
try{
url2=new URL(target,s);
}
catch (MalformedURLException e)
{ continue;}
urls.addElement(url2);
srcs.addElement("REFRESH");
anyframe=true;
// System.out.println("Redirecting (via REFRESH) : "+this.URL+" to "+s);
continue;
} /* META html redirect */
else if(line.equals("BODY"))
{
s=(String)x.get("BACKGROUND");
if(s==null) continue;
line="IMG"; /* CHECK: cheat it as IMG ?! */
// System.out.println("BODY...BG="+s);
} else
{
/* SRC a HREF generic handler */
s=(String)x.get("SRC");
if(s==null) {
s=(String)x.get("HREF");
if(s==null) continue;
}
}
URL url2;
url2=null;
try{
url2=new URL(target,s);
}
catch (MalformedURLException e)
{ continue;}
urls.addElement(url2);
srcs.addElement(line);
// System.out.println("added URL:"+url2);
}
catch (EOFException e) {break;}
}
hscan.close();
/* HTML parse hotovo */
if(anyframe==true) r.depth++;
/* prebrat nazbirana URL */
/* odstranit mailto:*
odstranit #neco
resolvnout MOJE (location) DNS aliasy
predelat Vector na Stringy ....
*/
vectscan:for(int i=urls.size()-1;i>=0;i--)
{
line=urls.elementAt(i).toString();
if(line.startsWith("mailto"))
{ urls.removeElementAt(i);srcs.removeElementAt(i);continue;}
if(r.loc.aliases!=null)
{
//unaliasing
findalias:for(int x=r.loc.aliases.length-1;x>=0;x--)
if(line.startsWith(r.loc.aliases[x]))
{
// System.out.println("URL: "+line);
line=r.loc.locbase+line.substring(r.loc.aliases[x].length());
// System.out.println("\tdealiased as "+line);
// urls.setElementAt(line,i);
break findalias;
}
} /* dealiasing */
byte v[];
int ln=line.length();
v=new byte[ln];
line.getBytes(0,ln,v,0);
scanhash:for(int z=0;z<ln;z++)
switch(v[z])
{
case 0x23:
case 0x0d:
case 0x0a:
case 0x20:
line=line.substring(0,z);break scanhash;
}
urls.setElementAt(line,i);
}
// zpracovat URLs
for(int i=urls.size()-1;i>=0;i--)
{
String mybase=target.getProtocol()+"://"+target.getHost();
String mydir=util.getDirname(target.getFile());
line=(String)urls.elementAt(i);
if(visited.get(line)!=null) continue; // already visited
visited.put(line,line);
r.loc.processURL(mybase,mydir,r.depth,r.depthset,line,(String)srcs.elementAt(i));
}
// dump print URLs
/*
for(int i=urls.size()-1;i>=0;i--)
{
System.out.println(srcs.elementAt(i)+" = "+urls.elementAt(i));
}
*/
}
catch (Exception ignore)
{
System.err.print("Loader got "+ignore+" when loading "+r.url);
if(!(ignore instanceof java.io.IOException)) ignore.printStackTrace();
if(r.retry++<maxretry)
{
System.err.println(", re-inserting to queue for retry");
pq.push(r,retryprio);
}
else
System.err.println("");
}
// release lock
synchronized(pq)
{
now--;
pq.notify();
}
}
private final void sendHTTPrequest(InetAddress adr,int port,String request) throws IOException
{
s=new Socket(adr,port);
DataOutputStream dos;
// otevrit data output stream
dos=new DataOutputStream(new BufferedOutputStream(s.getOutputStream(),1024));
// send request
StringBuffer sb;
sb=new StringBuffer(1024);
sb.append("GET ");
sb.append(request);
sb.append(" HTTP/1.0\r\nAccept: */*\r\nUser-Agent: Mozilla/3.01 (Java Virtual Machine; "+NAME+" "+VERSION+")\r\n");
if(r.update==mask.UPD_RELOAD|| r.update==mask.UPD_FORCEUPDATE)
sb.append("Pragma: no-cache\r\n");
sb.append("\r\n");
dos.writeBytes(sb.toString());
dos.flush();
// System.gc();
}
private final static void done()
{
// release lock
synchronized(pq)
{
now--;
pq.notify();
}
}
private final static void addToQueue(request r,float prio)
{
if(-1==pq.search(r) && visited.get(r.url)==null)
{
//System.err.println("puting to queue");
visited.put(r.url,r.url);pq.push(r,prio);}
}
public final static void start()
{
//System.out.println("Starting main loop.");
Thread.currentThread().setPriority(Thread.MAX_PRIORITY-2);
synchronized(pq)
{
runloop:while(true)
{
if(maxThreads==now)
try {
pq.wait();
}
catch (InterruptedException leaveUsAlonePlease) {}
else
{
// musime spustit dalsi
request r;
try
{
r=(request)pq.pop();
}
catch (NoSuchElementException nse)
{
if(now>0) // fronta prazdna a nejake jeste bezi...
try {
pq.wait();
}
catch (InterruptedException leaveUsAlonePlease) {}
else break; /* no runners anymore */
continue; // skip run new
}
if(r.act==mask.ACT_REJECT) continue; // ignore it
// RUN NEW REQUEST
// System.out.println("Starting fetch of "+r.url);
Thread t;
t=new Thread(new loader(r));
t.setPriority(Thread.NORM_PRIORITY);
t.start();
now++;
}
}
}/* runloop */
}
private final static void saver(DataInputStream sin,DataOutputStream out) throws IOException
{
/* otocime to, cteme data ze serveru a posilame je klientu */
while(true)
{
byte b[]=new byte[IO_BUFFER];
int rb;
rb=sin.read(b);
if(rb==-1) break; /* konec dat! */
if(out!=null) out.write(b,0,rb);
}
if(out!=null) out.close();
sin.close();
}
private final void log(String what,short msk)
{
if( (this.r.log&msk)>0 )
{
if( (this.r.log&mask.LOG_URLONLY)>0 )
System.out.println(r.url);
else
{
System.out.println(what+": "+r.url);
}
}
}
private final static void configloader(String cfgfile)
{
configloader cfg;
try
{
cfg=new configloader(cfgfile);
}
catch (IOException grrrr)
{ System.out.println("[CONFIG_ERROR] Error reading config file "+cfgfile);
loc=new location[0];
return;
}
/* loader init */
loc=cfg.getLocations();
def=cfg.getDefaultLocation();
maxThreads=cfg.getThreads();
maxretry=cfg.getMaxretry();
retryprio=cfg.getRetryPriority();
/* proxy */
proxyserver=cfg.getProxyServer();
proxyport=cfg.getProxyPort();
store=cfg.getLocalStore();
}
private final static location createNewLocation(String baseurl)
{
// search
location nl;
for(int i=0;i<loc.length;i++)
if(baseurl.indexOf(loc[i].locbase)==0)
{
System.out.println("Location "+baseurl+" configured as "+loc[i].locbase);
nl=new location(baseurl,loc[i]);
nl.aliases=loc[i].aliases;
return nl;
}
return new location(baseurl,def);
}
}