home *** CD-ROM | disk | FTP | other *** search
- #include <stdio.h>
- #include <ctype.h>
- #include "util.h"
- #include "combine.h"
- /*
- * pass2: Determine anchor points in the files.
- *
- * This routine identifies lines which occur precisely once in atleast two
- * files and no more than once in the third file. All such lines are
- * anchor points for passes 3 and 4.
- *
- * This routine scans the symbol table. For each record which meets the
- * above criteria, links are made in the record arrays to associate
- * anchor records with each other.
- *
- * Return value:
- * This procedure has no return value.
- */
-
- void pass2 () {
-
- register int good_files; /* Number of files that a record is unique in. */
-
- register int hash_code; /* Index into symbol table */
-
- register int i; /* Misc. variable */
-
- int indexes[MAX_FILE_COUNT];/* Index into each record array */
-
- /*
- * Clear the indexes for all non-existant files.
- */
-
- for (i = file_count; i < MAX_FILE_COUNT; ++i) {
- indexes[i] = 0;
- }
-
- /*
- * Set up a pseudo line at the front and end of each file as an
- * anchor point.
- */
-
- for (i = 0; i < MATCH_COUNT; ++i) {
- if (files[curr_file[i]].record != 0 &&
- files[corres_file[i]].record != 0) {
-
- files[curr_file[i]].record[BEGIN_INDEX].
- value[value_sub[i]] = BEGIN_INDEX;
- files[curr_file[i]].
- record[files[curr_file[i]].record_array_size-1].
- value[value_sub[i]] =
- files[corres_file[i]].record_array_size - 1;
-
- }
- }
-
- /*
- * Test each entry in the symbol table.
- */
-
- for (hash_code = 1; hash_code < sym_tab_size; ++hash_code) {
-
- /*
- * Quickly see if the hash code is used at all
- */
- if ( sym_tab_cache_ptr[hash_code] == CACHE_FREE_ENTRY ){
- continue;
- }
-
-
- /*
- * Ensure the record occurs at most once in all files.
- *
- * This code counts the number of files a unique record is found in.
- * If the record does not exist precisely once in atleast two files or
- * if the record is not unique in any file, then the record cannot be
- * an anchor record.
- */
-
- good_files = 0; /* Assume the record exists in no files */
- for (i = 0; i < file_count; ++i) {
- indexes[i] = files[i].sym_tab_index[hash_code];
- /* if record is not unique in this file */
- if (indexes[i] < 0) {
- good_files = 0;
- break;
- /* if record is unique in this file */
- } else if (indexes[i] > 0) {
- good_files++;
- }
- }
-
- if (good_files < 2) {/* Record not unique in enough files */
- continue;
- }
-
- /*
- * Link up anchors between any two files.
- *
- * If the current file and the corresponding file both contain
- * the same unique line. Link the current file to the
- * corresponding file.
- */
-
- for (i = 0; i < MATCH_COUNT; ++i) {
-
- if (indexes[curr_file[i]] > 0 &&
- indexes[corres_file[i]] > 0) {
-
- files[curr_file[i]].
- record[indexes[curr_file[i]]].
- value[value_sub[i]] =
- indexes[corres_file[i]];
-
- }
-
- }
-
- }
-
- }
-