home *** CD-ROM | disk | FTP | other *** search
Java Source | 2000-04-16 | 19.6 KB | 628 lines |
- import java.util.*;
- import java.net.*;
-
- public final class mask {
-
- /* load flags */
- // act= (what to do with this object?)
- // reject,stop- don't download it (same as q=0.0)
- // noparse - don't parse it (same as depth=-1)
- // fastclose - close connection after sending request (will not be parsed even if parseable)
- // close - if object is unparseable (from content-type), close connection without
- // downloading more data
-
- public static final byte ACT_LOAD=0;
- public static final byte ACT_REJECT=1;
- public static final byte ACT_NOPARSE=2;
- public static final byte ACT_FASTCLOSE=3;
- public static final byte ACT_CLOSE=4;
- public static final byte ACT_NOSAVE=5;
- public static final byte ACT_NOPROXY=6;
-
- /* log flags */
- // log=
- // none - no loging done
- // queue - when element has been taken from queue
- // load - when trying to load
- // parse - when parsing
- // saving - when saving to disk
- // err - when loading error occurs
- // fatalerr - when fatal loading error occurs
- // urlonly - log URL only
-
- public static final short LOG_SERVERDEFAULT =-1;
- public static final short LOG_NONE =0;
- public static final short LOG_QUEUE =1;
- public static final short LOG_LOAD =2;
- public static final short LOG_PARSE =4;
- public static final short LOG_SAVE =8;
- public static final short LOG_ERR =16;
- public static final short LOG_FATALERR=32;
- public static final short LOG_IOERR =64;
- public static final short LOG_STORED =128;
- public static final short LOG_URLONLY =256; // MUST be last
- // aliases
- public static final short LOG_ALL=LOG_URLONLY-1;
- public static final short LOG_DEFAULT=LOG_LOAD|LOG_IOERR|LOG_ERR|LOG_FATALERR;
-
- /* update status flags */
- // norefresh - if object allready exists, don't try to load it
- // forcereload - force cache to reloading object
- // load,continue - load object in classic way (don't care about if old copy exists)
- // update - if object in cache is older than XXXX hours, start loading.
- // forceupdate - forced load if older
-
- public static final byte UPD_NOLIMIT=-1;
- public static final byte UPD_LOAD=0;
- public static final byte UPD_RELOAD=1;
- public static final byte UPD_UPDATE=2;
- public static final byte UPD_NOREFRESH=3;
- public static final byte UPD_FORCEUPDATE=4;
- public static final byte UPD_NOREPARSE=5;
-
- // strip=none,null - no URL striping before mask testing
- // =auto
- // =location,loc odstrani http://xxx/
- // =server odstrani http://xxx
-
- public static final byte STRIP_AUTO=0;
- public static final byte STRIP_LOCATION=1;
- public static final byte STRIP_SERVER=2;
- public static final byte STRIP_NONE=3;
- public static final byte STRIP_DIRECTORY=4;
-
-
-
- // size=xxxx bytes - only if object is bigger than xxxx bytes
- // size=known - only if we know size of object
- // size=unknown - only if we don't know size of object beeing downloaded
- // size=any - don't care about it
-
- public static final byte SIZE_NOLIMIT=-1;
- public static final byte SIZE_ANY=0;
- public static final byte SIZE_KNOWN=1;
- public static final byte SIZE_UNKNOWN=2;
- public static final byte SIZE_LIMITED=3;
-
- // target=any,anyserver - any wwwserver in the world (including my own)
- // alias for world,known,site,me
- // world - any undefined location
- // known - any known location (but not me or myserver)
- // server,site - alias for me,sameserver
- // same,sameserver - file located on the same server (not including me)
- // location, samelocation, me, this - located in Location URL
- // subdir - located in subdirectory
- //
-
- public static final byte TARGET_ANY=127;
- /* official subtypes */
- public static final byte TARGET_NONE=0;
- public static final byte TARGET_WORLD=1;
- public static final byte TARGET_KNOWN=2;
- public static final byte TARGET_SERVER=4;
- public static final byte TARGET_LOCATION=8;
- public static final byte TARGET_SUBDIR=16; // subdir in location
- public static final byte TARGET_DIRECTORY=32;
-
- /* aliases */
-
- public static final byte TARGET_SITE=TARGET_SERVER|
- TARGET_LOCATION|
- TARGET_SUBDIR|
- TARGET_DIRECTORY;
-
- public static final byte TARGET_ME =
- TARGET_LOCATION|
- TARGET_SUBDIR|
- TARGET_DIRECTORY;
-
- /* http - ANY highest */
- /* / - SITE */
- /* xxxx = location */
- /* * = cokoliv */
-
- /* target guess priority */
- public static final byte GUESS_TARGET_ANY=10;
- public static final byte GUESS_TARGET_SITE=8;
- public static final byte GUESS_TARGET_LOCATION=5;
- public static final byte GUESS_TARGET_ANYWHERE=0;
-
-
- public byte action,update,strip,size,target;
- public short log;
- public int sizelimit,updatelimit; // -1 is no limit
-
- public float q;
-
- public static final byte DEPTH_NOCHANGE=-2;
-
- public short depth;
-
- /* content section */
-
- private regexp contentmasks[]; /* regexp masks */
- private boolean contentok[]; /* true=normal, false=must NOT be matched */
- private boolean anycontent; /* true=OR, false=AND */
-
- /* extensions */
-
- private regexp extmasks[]; /* regexp masks */
- private boolean extok[]; /* true=normal, false=must NOT be matched */
- private boolean anyext; /* true=OR, false=AND */
-
- /* urlmasks */
- private regexp urlmasks[]; /* regexp masks */
- private boolean urlsok[]; /* true=normal, false=must NOT be matched */
- private boolean anyurl; /* true=OR, false=AND */
-
-
- /* src masks */
- private regexp srcmasks[]; /* regexp masks */
- private boolean srcok[]; /* true=normal, false=must NOT be matched */
- private boolean anysrc; /* true=OR, false=AND */
-
- private final void systemdefaults()
- {
- action=ACT_LOAD;
- update=UPD_LOAD;
- strip=STRIP_AUTO;
- target=TARGET_NONE;
- size=SIZE_ANY;
- log=LOG_NONE; // SERVERDEFAULT;
- sizelimit=SIZE_NOLIMIT;
- updatelimit=UPD_NOLIMIT;
- anycontent=anyext=anyurl=anysrc=true;
- q=1.0f;
- depth=DEPTH_NOCHANGE;
- }
-
- public mask(options o)
- {
- systemdefaults();
- if(o==null) throw new NullPointerException("mask");
- for(int i=o.parsed.size()-1;i>=0;i--)
- {
- try
- {
- String opt;
- StringTokenizer st;
- st=new StringTokenizer((String)o.parsed.elementAt(i));
- opt=st.nextToken();
- // System.out.println("line="+o.parsed.elementAt(i));
- whl:while(true)
- {
- String s;
- boolean ok;
- s=st.nextToken();
- ok=true;
- if(s.length()==1 && s.charAt(0)=='!')
- {
- ok=false;
- s=st.nextToken();
- }
-
- if(opt.equals("q")) { q=Float.valueOf(s).floatValue();break whl;}
- else if (opt.equals("url"))
- {
- if(s.equals("*") || s.equals("any")) {urlmasks=null;urlsok=null;}
- else
- {
- urlmasks=util.addRegexpToArray(s, urlmasks);
- urlsok=util.addBooleanToArray(ok,urlsok);
- }
- s=st.nextToken();
- if(s.equals(",")) anyurl=true; else anyurl=false;
- continue;
- }
- else if (opt.equals("content") || opt.equals("ct"))
- {
- if(s.equals("*") || s.equals("any")) {contentmasks=null;contentok=null;}
- else
- {
- contentmasks=util.addRegexpToArray(s, contentmasks);
- contentok=util.addBooleanToArray(ok,contentok);
- }
- s=st.nextToken();
- if(s.equals(",")) anycontent=true; else anycontent=false;
- continue;
- }
- else if (opt.equals("ext"))
- {
- if(s.equals("*") || s.equals("any")) {extmasks=null;extok=null;}
- else
- {
- extmasks=util.addRegexpToArray(s, extmasks);
- extok=util.addBooleanToArray(ok,extok);
- }
- s=st.nextToken();
- if(s.equals(",")) anyext=true; else anyext=false;
- continue;
- }
-
- else if (opt.equals("src"))
- {
- if(s.equals("*") || s.equals("any")) {srcmasks=null;srcok=null;}
- else
- {
- srcmasks=util.addRegexpToArray(s, srcmasks);
- srcok=util.addBooleanToArray(ok,srcok);
- }
- s=st.nextToken();
- if(s.equals(",")) anysrc=true; else anysrc=false;
- continue;
- }
-
- else if (opt.equals("depth"))
- {
- try
- {
- depth=(short)Integer.valueOf(s).intValue();
- }
- catch (NumberFormatException n)
- {
- System.err.println("[CONFIG_ERROR] Bad depth : "+s);
- }
-
- s=st.nextToken();
- continue;
- }
-
- else if (opt.equals("size"))
- {
- if(s.equals("any")) { size=SIZE_ANY;sizelimit=SIZE_NOLIMIT;}
- else if (s.equals("known"))
- if(ok==false) size=SIZE_UNKNOWN;
- else size=SIZE_KNOWN;
- else if (s.equals("unknown"))
- if(ok==true) size=SIZE_UNKNOWN;
- else size=SIZE_KNOWN;
- else
- try
- {
- sizelimit=Integer.valueOf(s).intValue();
- if(size==SIZE_ANY) size=SIZE_LIMITED;
- }
- catch (NumberFormatException n)
- {
- System.err.println("[CONFIG_ERROR] Bad size : "+s);
- }
-
- s=st.nextToken();
- continue;
- }
-
- else if (opt.equals("strip"))
- {
- if(s.equals("none")) strip=STRIP_NONE;
- else if (s.equals("server")) strip=STRIP_SERVER;
- else if (s.equals("location")) strip=STRIP_LOCATION;
- else if (s.equals("auto")) strip=STRIP_AUTO;
- else if (s.equals("dir")) strip=STRIP_DIRECTORY;
- else
- System.err.println("[CONFIG_ERROR] Invalid parameter to strip option : "+s);
-
- s=st.nextToken();
- continue;
- }
-
- else if (opt.equals("target"))
- {
- if(s.equals("any")) target=TARGET_ANY;
- else if (s.equals("anyserver")) target=TARGET_ANY;
- else if (s.equals("world")) target|=TARGET_WORLD;
- else if (s.equals("known")) target|=TARGET_KNOWN;
- else if (s.equals("server")) target|=TARGET_SERVER;
- else if (s.equals("location")) target|=TARGET_LOCATION;
- else if (s.equals("directory")) target|=TARGET_DIRECTORY;
- else if (s.equals("dir")) target|=TARGET_DIRECTORY;
- else if (s.equals("subdir")) target|=TARGET_SUBDIR;
-
-
- // aliases
- else if (s.equals("loc")) target|=TARGET_LOCATION;
- else if (s.equals("me")) target|=TARGET_ME;
- else if (s.equals("site")) target|=TARGET_SITE;
-
-
- else if (s.equals("auto")) target=0;
- else
- System.err.println("[CONFIG_ERROR] Invalid parameter to target option : "+s);
-
- s=st.nextToken();
- continue;
- }
-
- else if (opt.equals("act"))
- {
- if(s.equals("reject") ||
- s.equals("stop") ||
- (s.equals("load") && ok==false) )
- { q=0.0f;action=ACT_REJECT;}
- else if (s.equals("noparse")) { depth=-1;action=ACT_NOPARSE;}
- else if (s.equals("fastclose")) action=ACT_FASTCLOSE;
- else if (s.equals("close")) action=ACT_CLOSE;
- else if (s.equals("load")) action=ACT_LOAD;
- else if (s.equals("nosave")) action=ACT_NOSAVE;
- else if (s.equals("direct") ||
- s.equals("noproxy") )
- action=ACT_NOPROXY;
-
- else
- System.err.println("[CONFIG_ERROR] Invalid parameter to act option : "+s);
-
- s=st.nextToken();
- continue;
- }
-
- else if (opt.equals("log"))
- {
- if((s.equals("none") ||
- s.equals("off")
- ) && ok==true)
- log=LOG_NONE;
- else if (s.equals("queue")) log|=LOG_QUEUE;
- else if (s.equals("server")
- ||s.equals("serverdefault")
- ) log=LOG_SERVERDEFAULT;
- else if (s.equals("load")) log|=LOG_LOAD;
- else if (s.equals("stored")
- ||s.equals("saved")
- ||s.equals("store")
- ||s.equals("loaded")
- ) log|=LOG_STORED;
- else if (s.equals("parse")) log|=LOG_PARSE;
- else if (s.equals("save")) log|=LOG_SAVE;
- else if (s.equals("err")
- ||s.equals("error")
- ) log|=LOG_ERR;
- else if (s.equals("all")) log|=LOG_ALL;
- else if (s.equals("ioerr")
- || s.equals("io"))
- log|=LOG_IOERR;
- else if (s.equals("fatalerr")
- || s.equals("fatal"))
- log|=LOG_FATALERR;
- else if (s.equals("default")) log=LOG_DEFAULT;
- else if (s.equals("url") ||
- s.equals("urlonly") ||
- s.equals("short")
- )
- if (ok==true) log=(short)((log & LOG_ALL)|LOG_URLONLY);
- else log&=LOG_ALL;
-
- else
- System.err.println("[CONFIG_ERROR] Invalid parameter to log option : "+s);
-
- s=st.nextToken();
- continue;
- }
- else if (opt.equals("upd"))
- {
- if(s.equals("load")) update=UPD_LOAD;
- else if (s.equals("norefresh") ||
- s.equals("none")
- ) update=UPD_NOREFRESH;
- else if (s.equals("reload") ||
- s.equals("force") ||
- s.equals("forceload") ||
- s.equals("forcereload")
- ) update=UPD_RELOAD;
- else if (s.equals("update")) update=UPD_UPDATE;
- else if (s.equals("forceupdate")) update=UPD_FORCEUPDATE;
- else if (s.equals("noreparse")) update=UPD_NOREPARSE;
- else
- try
- {
- updatelimit=Integer.valueOf(s).intValue();
- if(update!=UPD_UPDATE || update!=UPD_FORCEUPDATE )
- update=UPD_UPDATE;
- }
- catch (NumberFormatException n)
- {
- System.err.println("[CONFIG_ERROR] Bad update interval : "+s);
- }
-
- s=st.nextToken();
- continue;
- }
- else
- {
- System.err.println("[CONFIG_ERROR] Unknown mask option "+opt);
- break;
- }
- }
-
- }
- catch (NoSuchElementException ignore)
- {}
-
-
- }
- guessTarget();
- guessStrip();
- }
-
- private final void guessTarget()
- {
- if(target!=0) return;
- if(urlmasks==null) { target=TARGET_ME;return;}
- /* guess: */
- byte ttarget=GUESS_TARGET_ANYWHERE;
-
- for(int i=urlmasks.length-1;i>=0;i--)
- {
- String s;
- s=urlmasks[i].toString();
-
- if(s.indexOf("://")>0)
- {
- if(ttarget<GUESS_TARGET_ANY)
- ttarget=GUESS_TARGET_ANY;
- break;
- }
-
- if(s.startsWith("/"))
- {
- if(ttarget<GUESS_TARGET_SITE)
- ttarget=GUESS_TARGET_SITE;
- continue;
- }
-
- if(s.startsWith("*"))
- {
- if(ttarget<GUESS_TARGET_ANYWHERE)
- ttarget=GUESS_TARGET_ANYWHERE;
- continue;
- }
-
- if(ttarget<GUESS_TARGET_LOCATION)
- ttarget=GUESS_TARGET_LOCATION;
-
- }
- /* and setup..... */
- switch(ttarget)
- {
- case GUESS_TARGET_ANYWHERE:
- case GUESS_TARGET_ANY:
- target=TARGET_ANY;break;
- case GUESS_TARGET_SITE:
- target=TARGET_SITE;break;
- case GUESS_TARGET_LOCATION:
- target=TARGET_ME;break;
- default: throw new IllegalArgumentException ("Screwed in guessTarget();");
- }
-
- }
-
- private final void guessStrip()
- {
- if( (strip==STRIP_LOCATION || strip==STRIP_DIRECTORY)
- &&
- (target & TARGET_ME)==0 )
- { strip=STRIP_SERVER;return;}
- if(strip!=STRIP_AUTO) return;
- if(urlmasks==null) { strip=STRIP_NONE;return;}
- if( (target & TARGET_KNOWN)!=0 ||
- (target & TARGET_WORLD)!=0 ) { strip=STRIP_NONE;return;}
- if( (target & TARGET_SERVER)!=0 ) { strip=STRIP_SERVER;return;}
- if( (target & TARGET_LOCATION)!=0 ) { strip=STRIP_LOCATION;return;}
- strip=STRIP_DIRECTORY;
- }
-
- public final boolean hasContent()
- {
- if(contentmasks==null) return false; else return true;
- }
-
- public final static byte getTarget(String frombase,String fromdir,String locbase,String url)
- {
- // int target=mask.getTarget(frombase,fromdir,url);
- URL u;
- try
- {
- u=new URL(url);
-
- }
- catch (MalformedURLException badurl)
- {
- return TARGET_WORLD;
- }
- String urldir=util.getDirname(u.getFile());
-
- if(locbase.regionMatches(0,url,0,locbase.length()))
- {
- /* stejna location */
- if(fromdir.equals(urldir)) return TARGET_DIRECTORY;
- if(fromdir.regionMatches(0,urldir,0,fromdir.length()))
- return TARGET_SUBDIR;
- return TARGET_LOCATION;
- }
-
- String urlbase=u.getProtocol()+"://"+u.getHost();
- if(frombase.equals(urlbase))
- {
-
- /* stejny server */
- return TARGET_SERVER;
- }
- /* prohledama databazi known locations */
- for(int i=loader.loc.length-1;i>=0;i--)
- if(loader.loc[i].locbase.regionMatches(0,url,0,loader.loc[i].locbase.length()))
- return TARGET_KNOWN;
- return TARGET_WORLD;
- }
-
- public final boolean match(String url,String ext,String src,byte target,String locbase)
- {
- // System.out.println("url="+url+"\n\tsrc="+src+" mask.target="+this.target+" target="+target);
- if( (target & this.target)==0) return false;
- // System.out.println("target ok");
- srcscan:while(true)
- {
- if(srcmasks!=null)
- {
- /* check SRC */
- for(int i=0;i<srcmasks.length;i++)
- // System.out.println(srcok[i]+" "+srcmasks[i]);
- if(srcmasks[i].matches(src) == srcok[i])
- if(anysrc) break srcscan;
- else ;
- else
- if(!anysrc) return false;
-
- if(anysrc) return false;
- }
- break;
- }
- // System.out.println("src ok");
- extscan:while(true)
- {
- if(extmasks!=null)
- {
- /* check EXT */
- for(int i=0;i<extmasks.length;i++)
- if(extmasks[i].matches(ext) == extok[i])
- if(anyext) break extscan;
- else ;
- else
- if(!anyext) return false;
- if(anyext) return false;
- }
- break;
- }
- // System.out.println("ext ok");
- urlscan:while(true)
- {
- if(urlmasks!=null)
- {
- String stripped=url;
- switch(strip)
- {
- case STRIP_NONE:break;
- case STRIP_SERVER:
- int i=url.indexOf("://",0);
- i=url.indexOf("/",i+3);
- stripped=url.substring(i);
- break;
- case STRIP_LOCATION:
- stripped=url.substring(locbase.length());
- break;
- }
-
-
- for(int i=0;i<urlmasks.length;i++)
- if(urlmasks[i].matches(stripped) == urlsok[i])
- if(anyurl) break urlscan;
- else ;
- else
- if(!anyurl) return false;
- if(anyurl) return false;
- }
- break;
- }
- // System.out.println("ALL OK");
- return true;
- }
-
- }
-