home *** CD-ROM | disk | FTP | other *** search
- #include <stdio.h>
- #include <ctype.h>
- #include "util.h"
- #include "combine.h"
- /*
- * pass3: Expand anchors to non-unique records.
- *
- * If in a pair of files immediately adjacent to an anchor point
- * there are lines which are identical to each other, these lines
- * are then considered to be anchor points. Repeated application of this
- * principle on each possible pair of files and in each possible direction
- * results in all matched lines being found.
- *
- * This routine calls the 'pass3_scan' routine once for each combination of
- * file pairs and directions.
- *
- * Possible design flaw: If a particular anchor exists in all three files,
- * should adjacent records be considered to be an anchor only if all
- * three adjacent records are identical.
- *
- * Return value:
- * This procedure has no return value.
- */
-
- void pass3 () {
-
- int i; /* Misc. variable */
-
- int j; /* Misc. variable */
-
- /*
- * Scan each pair of files in the forward direction.
- */
-
- for (j = 0; j < UNIQUE_MATCH_COUNT; ++j) {
-
- i = unique_match[j];
-
- if (files[curr_file[i]].record != 0 &&
- files[corres_file[i]].record != 0) {
-
- pass3_scan (i, 1);
-
- }
-
- }
-
- /*
- * Scan each pair of files in the reverse direction.
- */
- for (j = 0; j < UNIQUE_MATCH_COUNT; ++j) {
-
- i = unique_match[j];
-
- if (files[curr_file[i]].record != 0 &&
- files[corres_file[i]].record != 0) {
-
- pass3_scan (i, -1);
-
- }
-
- }
-
- }
- /*
- * pass3_scan: Expand anchors to non-unique records.
- *
- * This routine scans the record arrays for a pair of files in one
- * direction expanding anchors. For each record in the first file,
- * if the current record is an anchor, and the next record in each file
- * is a hash code (i.e. not an anchor), and the hash codes are the same,
- * then the next records are considered to be anchors.
- *
- * Return value:
- * This procedure has no return value.
- */
- void pass3_scan (match_no, direction)
- int match_no; /* input */
- /* Which relationship is being scanned */
-
- int direction; /* This is the direction to scan the file.
- Valid values are 1 for forward and -1 for
- backward. */
-
- {
-
- int end_index1; /* Index into record array of file1 of end of
- the record array. (e.g. the first record to
- not process when going in 'direction') */
-
- file_type * file1_ptr; /* First file - current_file */
-
- file_type * file2_ptr; /* Second file - corresponding file */
-
- int file1_sub; /* For each record of the first file, this is a
- subscript of the 'value' array of the
- relationship between file1 and file2 */
-
- int file2_sub; /* For each record of the second file, this is
- a subscript of the 'value' array of the
- relationship between file2 and file1 */
-
- int index1; /* Index into record array of file1 of the
- record currently being processed */
-
- int index2; /* Index into record array of file2 of the
- record anchored to the 'index1' record */
-
- record_type * record1; /* Pointer to record array of file1 */
-
- record_type * record2; /* Pointer to record array of file2 */
-
- int *val1_ptr; /* Pointer to the 'value' field in 'next'
- record on file1. This is the 'value' which
- indicates the relationship to file2. */
-
- int *val2_ptr; /* Pointer to the 'value' field in 'next'
- record on file2. This is the 'value' which
- indicates the relationship to file1. */
-
-
- /*
- * Initialize indexes to first and last record of the files.
- */
- file1_ptr = &files[curr_file[match_no]];
- file2_ptr = &files[corres_file[match_no]];
- file1_sub = value_sub[match_no];
- file2_sub = rev_value_sub[match_no];
-
- record1 = file1_ptr -> record;
- record2 = file2_ptr -> record;
-
- if (direction == 1) {
- index1 = BEGIN_INDEX;
- end_index1 = file1_ptr -> record_array_size - 1;
- } else {
- index1 = file1_ptr -> record_array_size - 1;
- end_index1 = BEGIN_INDEX;
- }
-
- /*
- * For each record in the first file.
- *
- * Note: given the existence of the 'begin' and 'end' record on each
- * end of the record array, and given the way the 'end_index1'
- * was set up above (so as to not process the last record
- * on the opposite end of the file from which the scan started),
- * we can convince ourselves that the tests below won't ever index
- * off the end of the arrays.
- */
- for (; index1 != end_index1; index1 += direction) {
-
- /*
- * If the current record in file1 is an anchor point,
- * Compute the index into file2 of the corresponding record.
- * Compute a pointer to value field of the next record in each file.
- */
-
- index2 = record1[index1].value[file1_sub];
- if (!is_hash_code (index2)) {
-
- val1_ptr = &(record1[index1 + direction].
- value[file1_sub]);
- val2_ptr = &(record2[index2 + direction].
- value[file2_sub]);
-
- /*
- * If the neither of the next records are anchor points and
- * the records are identical,
- * consider these records to be anchors.
- */
- if (is_hash_code (*val1_ptr) &&
- is_hash_code (*val2_ptr) &&
- *val1_ptr == *val2_ptr) {
-
- link_records (match_no, index1 + direction,
- index2 + direction);
-
- }
-
- }
-
- }
-
- }
-