home *** CD-ROM | disk | FTP | other *** search
File List | 1989-10-04 | 19.6 KB | 753 lines |
- _EXTENSIBLE HASHING_
- by Steve Heller
-
-
- [LISTING ONE]
-
- {KRAM.PAS - A Keyed Random Access Method for Turbo Pascal 5.0.}
- {Copyright (c) 1989 by Chrysalis Software Corp. All rights reserved.}
- {Written by Steve Heller.}
-
- {$V-}
-
- program KramTest;
-
- uses Crt;
-
- const
- PARAMSIZE = 128; {the size of the parameter block, in bytes}
-
- DATASIZE = 2048; {the size of a data block, in bytes}
-
- INDEXCOUNT = 1024; {the number of index entries. Must be a power of 2.}
- INDEXSIZE = INDEXCOUNT * Sizeof(integer); {the size of the index block}
-
- type
- KramDataType = array[1..DATASIZE] of byte;
- KramDataPtr = ^KramDataType;
- KramIndexType = array[1..INDEXCOUNT] of integer;
- KramIndexPtr = ^KramIndexType;
-
- {The variant record type below is used so that we can add new}
- {parameters if needed, without changing the size of the parameter}
- {block in the data file.}
- KramParamType = record
- case integer of
- 0: (
-
- {the items below are saved from one run to another}
- KeyLength:integer;
- DataLength:integer;
- HighBlock:integer;
- {the items above are saved from one run to another}
-
- {the ones below are valid only during the current run}
- CurrentBlock:integer;
- BlockModified:boolean;
- DataPtr:KramDataPtr;
- IndexPtr:KramIndexPtr;
- );
- 1: (Dummy:array[1..PARAMSIZE] of byte);
- end;
- KramParamPtr = ^KramParamType;
-
- FileRec = RECORD
- Handle : word;
- Mode : word;
- RecSize: word;
- Private: array [1..26] OF byte;
-
- {note: this is a nonstandard declaration}
- UserData: array [1..4] OF pointer;
-
- Name : array [0..79] OF char;
- END;
-
-
-
-
- FUNCTION HashCode(Key : string):longint;
-
- {Use this function to calculate a pseudo-random number based on the}
- {value of its argument. The result is used to determine which index}
- {entry will be used to access the record with the given key. The}
- {algorithm used here is appropriate for ASCII key values, as it uses}
- {the low five bits of each byte of the key.}
-
- VAR
- i : integer;
- result : longint;
- temp1 : longint;
- temp2 : longint;
- bytetemp : byte;
-
- BEGIN
-
- result := 0;
-
- FOR i := 1 TO length(Key) DO
- BEGIN
- temp1 := result shl 5;
- temp2 := result shr 27;
- result := temp1 or temp2;
- bytetemp := ord(Key[i]);
- result := result xor bytetemp;
- END;
-
- HashCode := result;
-
- END;
-
-
-
-
- PROCEDURE SeekBlock(VAR KramFile : file; BlockNum : integer);
-
- {Use this procedure to position the file pointer to a particular data}
- {block. In order to do this, we must skip the parameter block and }
- {the index block. Also note that the first data block is #1.}
-
- VAR
- BlockPosition : longint;
-
- BEGIN
-
- BlockPosition := PARAMSIZE + INDEXSIZE + (DATASIZE * (BlockNum-1));
- Seek(KramFile,BlockPosition);
-
- END;
-
-
-
-
- PROCEDURE KramInit(FileName : string; KeyLength, DataLength: integer);
-
- {Use this procedure to initialize a new KRAM file. It sets up the}
- {key length and the data length according to the input arguments.}
- {The high data block number is set to 1 and data block #1 is}
- {initialized to zeroes (an empty block). The index block is set to}
- {all 1's, so that all accesses will go to the empty data block.}
-
- VAR
- KramFile:file;
- Index : KramIndexType;
- Data : KramDataType;
- Params : KramParamType;
- i : integer;
-
-
- BEGIN
-
- Assign(KramFile,FileName);
- Rewrite(KramFile,1);
-
- FillChar(Params.Dummy,SizeOf(Params.Dummy),0);
-
- Params.KeyLength := KeyLength;
- Params.DataLength := DataLength;
-
- {the highest data block number in use is #1}
- Params.HighBlock := 1;
-
- BlockWrite(KramFile,Params,SizeOf(Params));
-
- {Initialize the index block to all 1's, as only data block #1 exists}
-
- FOR i := 1 TO INDEXCOUNT DO
- Index[i] := 1;
-
- BlockWrite(KramFile,Index,SizeOf(Index));
-
- {Initialize the first data block to all zeroes}
-
- FOR i := 1 TO DATASIZE DO
- Data[i] := 0;
-
- BlockWrite(KramFile,Data,SizeOf(Data));
-
- Close(KramFile);
-
- END;
-
-
-
-
- PROCEDURE KramOpen(VAR KramFile:file;KramFileName:string);
-
- {Use this procedure to open the file. It reads the parameter block}
- {and the index block from the beginning of the file and allocates}
- {space for the data block.}
-
- VAR
- RecsRead : integer;
- ParamPtr : KramParamPtr;
-
- BEGIN
-
- Assign(KramFile,KramFileName);
- Reset(KramFile,1);
-
- New(ParamPtr);
-
- KramParamPtr(FileRec(KramFile).UserData[1]) := ParamPtr;
-
- BlockRead(KramFile,ParamPtr^,SizeOf(KramParamType),RecsRead);
- IF RecsRead <> SizeOf(KramParamType) THEN
- BEGIN
- WriteLn('Invalid KRAM file: ',KramFileName);
- Halt(1);
- END;
-
- New(ParamPtr^.IndexPtr);
- New(ParamPtr^.DataPtr);
- ParamPtr^.CurrentBlock := 0;
-
- BlockRead(KramFile,ParamPtr^.IndexPtr^,
- SizeOf(KramIndexType),RecsRead);
-
- IF RecsRead <> SizeOf(KramIndexType) THEN
- BEGIN
- WriteLn('Invalid KRAM file: ',KramFileName);
- Halt(1);
- END;
-
- END;
-
-
-
-
- PROCEDURE KramClose(var KramFile:file);
-
- {Use this procedure to close the file after use. It also deallocates}
- {the dynamic storage used for the parameter, index, and data blocks.}
-
- {IMPORTANT NOTE: If you have modified the file, by adding records,}
- {for example, you MUST close the file to ensure that all the records}
- {have been written to the file.}
-
- VAR
- ParamPtr : KramParamPtr;
-
- BEGIN
-
- ParamPtr := KramParamPtr(FileRec(KramFile).UserData[1]);
- SeekBlock(KramFile,ParamPtr^.CurrentBlock);
- BlockWrite(KramFile,ParamPtr^.DataPtr^,DATASIZE);
-
- Dispose(ParamPtr^.DataPtr);
- Dispose(ParamPtr^.IndexPtr);
- Dispose(ParamPtr);
-
- Close(KramFile);
-
- END;
-
-
-
- FUNCTION KramAdd(VAR KramFile : file; KeyValue : string;
- DataValue : string):boolean;
-
- {Use this procedure to add records to a KRAM file that has been opened}
- {by KramOpen. You must supply the file pointer that was returned by}
- {KramOpen in "KramFile",the key in "KeyValue", and the data in}
- {"DataValue". The algorithm is known as "extensible hashing".}
-
- VAR
- ParamPtr : KramParamPtr;
- LowDataPtr : KramDataPtr;
- HighDataPtr : KramDataPtr;
- HashVal : longint;
- BlockNumber : integer;
- TempBlockNumber : integer;
- RecordLength : integer;
- i,j,k : integer;
- SlotFound: boolean;
- FoundFirst : boolean;
- FoundLast : boolean;
- FirstBlock : integer;
- LastBlock : integer;
- MidBlock : integer;
- KramFileName : string;
- TempKeyValue : string;
- OldOffset : integer;
- LowOffset : integer;
- HighOffset : integer;
- Duplicate : boolean;
- KeepLooking : boolean;
-
- BEGIN
-
- ParamPtr := KramParamPtr(FileRec(KramFile).UserData[1]);
-
- KramFileName := FileRec(KramFile).Name;
-
- RecordLength := ParamPtr^.KeyLength + ParamPtr^.DataLength;
-
- REPEAT
-
- HashVal := HashCode(KeyValue) and (INDEXCOUNT - 1);
-
- BlockNumber := ParamPtr^.IndexPtr^[HashVal+1];
-
- IF ParamPtr^.CurrentBlock <> BlockNumber THEN
- BEGIN
- IF (ParamPtr^.BlockModified)
- and (ParamPtr^.CurrentBlock <> 0) THEN
- BEGIN
- ParamPtr^.BlockModified := FALSE;
- SeekBlock(KramFile,ParamPtr^.CurrentBlock);
- BlockWrite(KramFile,ParamPtr^.DataPtr^,DATASIZE);
- END;
- SeekBlock(KramFile,BlockNumber);
- BlockRead(KramFile,ParamPtr^.DataPtr^,DATASIZE);
- ParamPtr^.CurrentBlock := BlockNumber;
- END;
-
-
- Duplicate := FALSE;
- KeepLooking := TRUE;
- SlotFound := FALSE;
- i := 1;
- OldOffset := 1;
-
- {initialize length of temporary string used for comparison}
- TempKeyValue[0] := char(ParamPtr^.KeyLength);
-
- WHILE KeepLooking AND (i <= DATASIZE DIV RecordLength) DO
- BEGIN
- IF ParamPtr^.DataPtr^[OldOffset] = 0 THEN
- BEGIN
- KeepLooking := FALSE;
- SlotFound := TRUE;
- END
- ELSE
- BEGIN
- Move(ParamPtr^.DataPtr^[OldOffset],TempKeyValue[1],ParamPtr^.KeyLength);
- IF TempKeyValue = KeyValue THEN
- BEGIN
- Duplicate := TRUE;
- KeepLooking := FALSE;
- END
- ELSE
- BEGIN
- OldOffset := OldOffset + RecordLength;
- i := i + 1;
- END;
- END;
- END;
-
- IF SlotFound THEN
- BEGIN
- Move(KeyValue[1],ParamPtr^.DataPtr^[OldOffset],
- ParamPtr^.KeyLength);
-
- Move(DataValue[1],
- ParamPtr^.DataPtr^[OldOffset+ParamPtr^.KeyLength],
- ParamPtr^.DataLength);
-
- ParamPtr^.BlockModified := TRUE;
- END
- ELSE IF Duplicate = FALSE THEN
- BEGIN
- {First we must determine how many index entries are affected}
- {by the split}
- FoundFirst := FALSE;
- FoundLast := FALSE;
- LastBlock := INDEXCOUNT;
- FOR i := 1 TO INDEXCOUNT DO
- BEGIN
- IF (ParamPtr^.IndexPtr^[i] = BlockNumber)
- and (not FoundFirst) THEN
- BEGIN
- FirstBlock := i;
- FoundFirst := TRUE;
- END;
- IF (ParamPtr^.IndexPtr^[i] <> BlockNumber)
- and FoundFirst and (not FoundLast) THEN
- BEGIN
- LastBlock := i - 1;
- FoundLast := TRUE;
- END
- END;
- IF FirstBlock >= LastBlock THEN
- {we are out of room}
- BEGIN
- WriteLn('KRAM file: ',KramFileName,' is full.');
- Halt(1);
- END;
-
- {Now we have to allocate another block for the split.}
- ParamPtr^.HighBlock := ParamPtr^.HighBlock + 1;
- MidBlock := (FirstBlock + LastBlock - 1) DIV 2;
-
- {We will assign the items that have the higher hash code}
- {to the new block}
- FOR i := MidBlock+1 TO LastBlock DO
- ParamPtr^.IndexPtr^[i] := ParamPtr^.HighBlock;
-
- {Now we have to go through all the items in the block to be split}
- {and assign them to the old block or the new one according to their}
- {new hash codes, 1 bit longer than the previous ones. An extra}
- {temporary block makes this easier.}
- New(LowDataPtr);
- New(HighDataPtr);
-
- FillChar(LowDataPtr^,SizeOf(LowDataPtr^),0);
- FillChar(HighDataPtr^,SizeOf(HighDataPtr^),0);
-
- OldOffset := 1;
- LowOffset := 1;
- HighOffset := 1;
- FOR i := 1 TO DATASIZE DIV RecordLength DO
- BEGIN
- Move(ParamPtr^.DataPtr^[OldOffset],TempKeyValue[1],
- ParamPtr^.KeyLength);
-
- TempKeyValue[0] := char(ParamPtr^.KeyLength);
- HashVal := HashCode(TempKeyValue) and (INDEXCOUNT - 1);
- TempBlockNumber := ParamPtr^.IndexPtr^[HashVal+1];
- IF TempBlockNumber = BlockNumber THEN
- {send to the lower one}
- BEGIN
- Move(ParamPtr^.DataPtr^[OldOffset],
- LowDataPtr^[LowOffset],RecordLength);
-
- LowOffset := LowOffset + RecordLength;
- END
- ELSE
- BEGIN
- Move(ParamPtr^.DataPtr^[OldOffset],
- HighDataPtr^[HighOffset],RecordLength);
-
- HighOffset := HighOffset + RecordLength;
- END;
- OldOffset := OldOffset + RecordLength;
- END;
-
- SeekBlock(KramFile,BlockNumber);
- BlockWrite(KramFile,LowDataPtr^,DATASIZE);
-
- SeekBlock(KramFile,ParamPtr^.HighBlock);
- BlockWrite(KramFile,HighDataPtr^,DATASIZE);
-
- Dispose(LowDataPtr);
- Dispose(HighDataPtr);
-
- {Make sure the same block isn't used the the next time we have to}
- {expand the file. Also note that the data block is out of date.}
- ParamPtr^.CurrentBlock := 0;
- Seek(KramFile,0);
- BlockWrite(KramFile,ParamPtr^,SizeOf(KramParamType));
- BlockWrite(KramFile,ParamPtr^.IndexPtr^,
- SizeOf(KramIndexType));
-
- END;
-
- UNTIL SlotFound or Duplicate;
-
- IF Duplicate THEN
- KramAdd := FALSE
- ELSE
- KramAdd := TRUE;
-
- END;
-
-
-
-
- FUNCTION KramRead(var KramFile:file; KeyValue: string;
- var DataValue: string):boolean;
-
- {Use this function to read records from a KRAM file that has been}
- {opened by KramOpen. You must supply the key in "KeyValue" and the}
- {file pointer that was returned by KramOpen in "KramFile". The data}
- {stored under that key will be returned in "DataValue". The return}
- {value is TRUE if the record was found, and FALSE if it wasn't.}
-
- VAR
- ParamPtr : KramParamPtr;
- HashVal : longint;
- BlockNumber : integer;
- RecordLength : integer;
- i,j,k : integer;
- SlotFound: boolean;
- KeepLooking : boolean;
- KramFileName : string;
- TempKeyValue : string;
- DataOffset : integer;
-
- BEGIN
-
- ParamPtr := KramParamPtr(FileRec(KramFile).UserData[1]);
-
- KramFileName := FileRec(KramFile).Name;
-
- RecordLength := ParamPtr^.KeyLength + ParamPtr^.DataLength;
-
- HashVal := HashCode(KeyValue) and (INDEXCOUNT - 1);
-
- BlockNumber := ParamPtr^.IndexPtr^[HashVal+1];
-
- IF ParamPtr^.CurrentBlock <> BlockNumber THEN
- BEGIN
- IF (ParamPtr^.BlockModified)
- and (ParamPtr^.CurrentBlock <> 0) THEN
- BEGIN
- ParamPtr^.BlockModified := FALSE;
- SeekBlock(KramFile,ParamPtr^.CurrentBlock);
- BlockWrite(KramFile,ParamPtr^.DataPtr^,DATASIZE);
- END;
- SeekBlock(KramFile,BlockNumber);
- BlockRead(KramFile,ParamPtr^.DataPtr^,DATASIZE);
- ParamPtr^.CurrentBlock := BlockNumber;
- END;
-
-
- KeepLooking := TRUE;
- SlotFound := FALSE;
- i := 1;
- DataOffset := 1;
-
- {initialize length of temporary string used for comparison}
- TempKeyValue[0] := char(ParamPtr^.KeyLength);
-
- WHILE KeepLooking AND (i <= DATASIZE DIV RecordLength) DO
- BEGIN
- IF ParamPtr^.DataPtr^[DataOffset] = 0 THEN
- KeepLooking := FALSE
- ELSE
- BEGIN
- Move(ParamPtr^.DataPtr^[DataOffset],TempKeyValue[1],ParamPtr^.KeyLength);
- IF TempKeyValue = KeyValue THEN
- BEGIN
- SlotFound := TRUE;
- KeepLooking := FALSE;
- END
- ELSE
- BEGIN
- DataOffset := DataOffset + RecordLength;
- i := i + 1;
- END;
- END;
- END;
-
-
- IF SlotFound THEN
- BEGIN
- Move(ParamPtr^.DataPtr^[DataOffset+ParamPtr^.KeyLength],
- DataValue[1],ParamPtr^.DataLength);
-
- DataValue[0] := char(ParamPtr^.DataLength);
- KramRead := TRUE;
- END
- ELSE
- KramRead:= FALSE;
-
- END;
-
-
-
-
- PROCEDURE KramInfo(var KramFile:file; var KeyLength: integer;
- var DataLength: integer);
-
- {Use this procedure to get the key and data lengths from a KRAM file that has}
- {been opened by KramOpen. You must supply the file pointer that was returned}
- {by KramOpen in "KramFile".}
-
- VAR
- ParamPtr : KramParamPtr;
-
- BEGIN
-
- ParamPtr := KramParamPtr(FileRec(KramFile).UserData[1]);
-
- KeyLength := ParamPtr^.KeyLength;
-
- DataLength := ParamPtr^.DataLength;
-
- END;
-
-
-
-
- VAR
- KramTestFile : file;
- FileName : string;
- DataFileName : string;
- KeyLength : integer;
- DataLength : integer;
- KramFile : file;
- Ok : boolean;
- InputFile : text;
- Key : string;
- RecNum : integer;
- TestRecNum : integer;
- RecNumStr : string;
- InputFileBuf : array [1..10240] of char;
- Create : string;
- Status : integer;
- ReadStatus : boolean;
- AddStatus : boolean;
- Temp : string;
- Data : string;
- TempKey : string;
- TempData : string;
-
- BEGIN
-
- ClrScr;
- WriteLn('KRAM.PAS - A Keyed Random Access Method for Turbo Pascal 5.0.');
- WriteLn('Copyright (c) 1989 by Chrysalis Software Corp. All rights reserved.');
- WriteLn('Written by Steve Heller.');
- WriteLn;
- WriteLn('This program is shareware: if you find it useful, you should');
- WriteLn('register it by sending a check in the amount of $39.95, ');
- WriteLn('payable to Chrysalis Software Corporation, to the following address');
- WriteLn;
- WriteLn('Chrysalis Software Corporation');
- WriteLn('P. O. Box 0335');
- WriteLn('Baldwin, NY 11510');
- WriteLn;
- WriteLn('Registered users will receive an updated version of the program,');
- WriteLn('incorporating all of the optimizations and extensions mentioned');
- WriteLn('in this article.');
- WriteLn;
- Write('Please hit ENTER to continue.');
- ReadLn(FileName);
- ClrScr;
-
- Write('Please enter the name of the data file to be used for input: ');
- ReadLn(DataFileName);
-
- Write('Please enter the name of the KRAM file: ');
- ReadLn(FileName);
-
- Write('Create the KRAM file (Y/N): ');
- ReadLn(Create);
- IF (Create[1] = 'Y') or (Create[1] = 'y') THEN
- BEGIN
-
- Write('Please enter the key length: ');
- ReadLn(KeyLength);
- Write('Please enter the data length: ');
- ReadLn(DataLength);
-
- Assign(InputFile,DataFileName);
- Reset(InputFile);
- SetTextBuf(InputFile,InputFileBuf);
-
- KramInit(FileName,KeyLength,DataLength);
- KramOpen(KramTestFile,FileName);
-
- RecNum := 0;
- AddStatus := TRUE;
- WHILE NOT EOF(InputFile) DO
- BEGIN
- IF RecNum Mod 10 = 0 THEN
- Write(RecNum:5);
- RecNum := RecNum + 1;
- ReadLn(InputFile,Temp);
- Key := Copy(Temp,1,KeyLength);
- Data := Copy(Temp,KeyLength+1,length(Temp));
- AddStatus := KramAdd(KramTestFile,Key,Data);
- IF AddStatus = FALSE THEN
- BEGIN
- WriteLn;
- WriteLn('Unable to add key: ',Key);
- END;
- END;
-
- WriteLn;
- Close(InputFile);
- KramClose(KramTestFile);
- END;
-
- Assign(InputFile,DataFileName);
- Reset(InputFile);
- SetTextBuf(InputFile,InputFileBuf);
-
- KramOpen(KramTestFile,FileName);
-
- KramInfo(KramTestFile,KeyLength,DataLength);
-
- RecNum := 0;
- WHILE NOT EOF(InputFile) DO
- BEGIN
- IF RecNum Mod 10 = 0 THEN
- Write(RecNum:5);
- RecNum := RecNum + 1;
- ReadLn(InputFile,Temp);
-
- Key := copy(Temp,1,KeyLength);
- TempData := copy(Temp,KeyLength+1,DataLength);
-
- ReadStatus := KramRead(KramTestFile,Key,Data);
- IF Not ReadStatus THEN
- BEGIN
- WriteLn;
- WriteLn('Key ',TempKey,' not found.');
- END;
- IF TempData <> Data THEN
- BEGIN
- WriteLn;
- WriteLn('Record number ',RecNum,' does not match');
- END;
- END;
-
- WriteLn;
-
- Close(InputFile);
- KramClose(KramTestFile);
-
- END.
-
- [LISTING TWO]
-
- {KRAMDATA.PAS - generates data for KRAM testing}
- {890226:1245}
-
- VAR
- s : String[255];
- t : Text;
- n : LongInt;
- i : LongInt;
- j : Integer;
- KeyLength : Integer;
- DataLength : Integer;
- FName : String[80];
-
- BEGIN
-
- Randomize;
-
- WriteLn;
-
- Write('Name of data file to be generated: ');
- ReadLn(FName);
-
- Write('Number of items to generate: ');
- ReadLn(n);
-
- Write('Length of Keys: ');
- ReadLn(KeyLength);
-
- Write('Length of Data: ');
- ReadLn(DataLength);
-
- Assign(t,Fname);
- Rewrite(t);
-
- FOR i := 1 TO N DO
- BEGIN
- IF i = 1000*int(i/1000) THEN Write(i:10);
- s := '';
- FOR j := 1 TO KeyLength DO
- s := s + chr(random(26)+65);
- Write(t,s);
- WriteLn(t,i:DataLength);
- END;
-
- Close(t);
-
- END.