home *** CD-ROM | disk | FTP | other *** search
Java Source | 2000-08-18 | 6.0 KB | 177 lines |
- //AdvSiteSearcher c1999 The Gilbert Post by David Faden
- //The applet and code are distributed as linkware...
- //If you use this applet or a variant on its code,
- //include a link to The Gilbert Post,
- //http://www.geocities.com/Athens/Parthenon/1911
- //The Gilbert Post and David Faden take no responsibility
- //for anything bad that happens as a result of using this applet
- //or a derivative based on its code. USE AT YOUR OWN RISK. (big letters)
- //Please send reports of problems to gilbertnews@hotmail.com, anyway, though.
-
- //begin SearchSieve.java
- //The SearchSieve class is probably the most useful of any of the classes of AdvSiteSearcher...
- //It is designed with the idea of char stream in mind...there is a more efficient search method
- //which I'm thinking about
- public class SearchSieve {
- private int num_matches;//how many matches have been found?
- private int kposition;//current position in key
- private char[] key;//search word/phrase
- private boolean exact;//must the word be an exact match or
- //can it be part of a larger word
- private char prevc;//the previous char examined,
- //if it's a word separator char, then the SearchSieve is passed -1
- private int ambiguity_mark;//position of an ambiguous char in the buf
- //amibiguous means the same as the first char of the key
- private char[] buffer;//store chars as one sequence is examined in case
- //the key starts at another spot within the chain
- private int bufferlength;
- private boolean buffering;
- private char[] temp;
-
- //spaces needed if exact match option being used; need to know if there are
- //"spaces" around the "word"
- //renamed defaultspaces to dspaces
- private static final char[] dspaces={' ',
- '\n','\r', ',', '.','!','(',')','?','\t','\'','\"','=','+',':',';','{',
- '}','[',']','\\','|', '<', '>', '/'};
- /*private char[] spaces;
- private int spaceslength;*/
-
- //This will not handle zero length arrays very gracefully
- public SearchSieve(char[] c, boolean bexact) {
- setKey(c,bexact);
- }
-
- public void setKey(char[] c,boolean bexact) {
- exact=bexact;
- //reset();
- key=new char[c.length];
- System.arraycopy(c,0,key,0,c.length);
- buffer=new char[key.length];
- temp=new char[buffer.length];//temp is created here to
- //avoid the overhead of recreating it each time it is needed
- bufferlength=0;
- buffering=false;
- }
-
- //This method needs more thought
- public void reset() {
- kposition=0;
- num_matches=0;
- prevc=(char)-1;//I think this may be equal to (char)0
- //because -1 is all zeros in the lowest two bytes
- bufferlength=0;//clear the buffer
- buffering=false;
- }
-
- public boolean addChar(char c) {
- //if there's a match, return true
- boolean matchfound=false;//left unchanged if nothing found
- boolean eatthebuffer=false;
- if(exact) {
- if(kposition==key.length) {
- //found a match
- if(isSpace(c)) {
- num_matches++;
- matchfound=true;
- }
- kposition=0;
- eatthebuffer=true;
- }
- else if(kposition==0) {
- if(c==key[0] && isSpace(prevc)) kposition++;
- }
- else if(c==key[kposition]) kposition++;
- else {
- //found no match
- kposition=0;
- eatthebuffer=true;
- }
- }
- //Inexact match check
- else {
- if(kposition==0) { if(c==key[0]) kposition++;}
- else if(c==key[kposition]) kposition++;
- else {
- //no match found
- kposition=0;
- eatthebuffer=true;
- }
- if(kposition==key.length) {
- num_matches++;
- kposition=0;
- eatthebuffer=true;
- matchfound=true;
- }
- }
- //a kposition of 1 indicates that one char match has been found
- if(kposition>1 && !buffering && c==key[0]) {
- if(!exact) {
- bufferlength=0;
- buffering=true;
- }//if exact, matches have to be surrounded by spaces
- else if(isSpace(prevc)) {
- bufferlength=0;
- buffering=true;
- }
- }
- if(buffering) {
- if(c==key[bufferlength]){
- buffer[bufferlength]=c;
- bufferlength++;
- }
- else {
- bufferlength=0;
- buffering=false;
- }
- }
- if(eatthebuffer && bufferlength>0) eatTheBuffer();//feed the buffer back into the
- //search system on failures and finds
- prevc=c;
- return matchfound;
- }
-
- //Feed the buffer through addChar(char) as if it were being externally entered.
- //No match can ever be found in the buffer alone, so there is
- //no need to check the return of addChar(char).
- //Huge problem with this method which will cause fatal error...
- //Buffer itself may contain a second ambiguous char...
- //However, if the buffer doesn't match up, it is overwritten
- //When the next call to eatTheBuffer is made
- //Idea, what if temp is made internal to eatTheBuffer?
- //This could lead to a major memory leak, though...
- //Fixed...breath easy now!
- private void eatTheBuffer() {
- //reset search
- kposition=0;
- /*char[] temp=new char[bufferlength];*/
- System.arraycopy(buffer,0,temp,0,bufferlength);
- int templength=bufferlength;//save the value of bufferlength
- bufferlength=0;//reset the buffer
- buffering=false;
- //don't bother checking addChar(char) return value--can never be true
- //in this case
- for(int i=0;i<templength;i++) addChar(temp[i]);
- }
-
- public int numOfMatches() {
- return num_matches;
- }
-
- //I'm not sure how much overhead this will add;
- //changed so as to only be used with default dspaces
- //It might actually pay to use a hash...
- //we can avoid the synchronized methods of Hashtable
- //since everything that we consider to be a space is with the ASCII range...
- //so we can just store the info in an array with entries for every ASCII char...
- //Something to put on the "to-do" list.
- private boolean isSpace(char c) {
- if(c==(char)-1) return true;
- for(int i=0;i<dspaces.length;i++) {
- if(c==dspaces[i]) return true;
- }
- return false;
- }
-
- }//end SearchSieve.java
-