home *** CD-ROM | disk | FTP | other *** search
- /*--------------------------------------------------------------------------
- MAXSORT -- Program that will sort (using a classic shell sort) any file
- regardless of size. Buffer_size and/or max_num_lines determines when
- to split a file. If a file is split it will be merged.
-
- Usage: maxsort [arg1] [arg2]
-
- arg1 = file to be sorted
- arg2 = output of sorted file
-
- By not supplying arg2 the output sorted file will go to
- stdout.
-
- If no arguments are given the program opens for input from
- stdin, and will output to the stdout.
-
- ---------------------------------------------------------------------------*/
- #include <stdio.h>
- #include <malloc.h>
-
- /* definitions */
- #define min(a, b) ((a < b) ? a : b)
- #define BOOL int
- #define BUFFER_SIZE 50000
- #define CTRL_Z 26
- #define EOS '\0'
- #define FILE_NAME_LEN 20
- #define MAX_STR_LEN 255
- #define MAX_NUM_LINES 1000
- #define MERGE_ORDER 3
- #define TEMP_FILE_END ".TMP"
- #define TEMP_FILE_START "S"
-
- /* Globals */
- FILE *infile;
- int lines[MAX_NUM_LINES];
- FILE *merge_files[MERGE_ORDER];
- int next_buf_avail = 0;
- int next_line = 0;
- int next_merge_file = 0;
- int sort_number = 0;
-
- /* Forward declarations */
- int compare_strings();
- void copy_to_destination();
- BOOL get_sort_buffer();
- void make_file_name();
- void merge();
- void open_merge_input();
- FILE * open_out_file();
- void resort_merge_buffer();
- void remove_files();
- void sort_buffer();
- void write_lines();
-
- main(argc, argv)
- int argc;
- char *argv[];
- {
- char *buffer;
- BOOL continue_reading;
- int limit;
- int merge_index;
- FILE *outfile;
- char output_file_name[FILE_NAME_LEN];
-
- infile = stdin;
- output_file_name[0] = EOS;
-
- if ((argc > 1) && ((infile = fopen(argv[1], "r")) == (FILE *) NULL)) {
- fprintf(stderr, "Unable to open %s for input.", argv[1]);
- exit(1);
- }
-
- if (argc > 2)
- strcpy(output_file_name, argv[2]);
-
- buffer = malloc(BUFFER_SIZE);
-
- do {
- continue_reading = get_sort_buffer(buffer);
- outfile = open_out_file(next_merge_file++);
- sort_buffer(buffer, lines, next_line);
- write_lines(outfile, buffer);
- if (fclose(outfile) != NULL) {
- fprintf(stderr, "Unable to close merge file.");
- exit(1);
- }
- fprintf(stderr, "Sort run completed. \n");
- } while (continue_reading);
-
- if (fclose(infile) != NULL) {
- fprintf(stderr, "Unable to close input file.");
- exit(1);
- }
-
- free(buffer);
-
- for (merge_index = 0; (merge_index < (next_merge_file -1));
- (merge_index += MERGE_ORDER)) {
- limit = min(merge_index + MERGE_ORDER, next_merge_file);
- open_merge_input(merge_index, limit);
- outfile = open_out_file(next_merge_file++);
- merge(merge_files, outfile, limit - merge_index);
- if (fclose(outfile) != NULL) {
- fprintf(stderr, "Unable to close merge file.");
- exit(1);
- }
- remove_files(merge_files, merge_index, limit);
- fprintf(stderr, "Merge run completed.\n");
- }
-
- if (output_file_name[0] != EOS ) {
- char tmp_file_name[FILE_NAME_LEN];
-
- make_file_name(--next_merge_file, tmp_file_name);
- unlink(output_file_name);
- if (rename(tmp_file_name, output_file_name) != NULL) {
- fprintf(stderr, "Unable to use output file.");
- exit(1);
- }
- }
- else
- copy_to_destination(--next_merge_file);
-
- }
-
- /* COMPARE_STRINGS */
-
- int compare_strings(string1, string2)
- char *string1;
- char *string2;
- {
- return(strcmp(string1, string2));
- }
-
-
- /* COPY_TO_DESTINATION */
-
- void copy_to_destination(filenum)
- int filenum;
- {
- FILE *fileptr;
- char filename[FILE_NAME_LEN];
- char line[MAX_STR_LEN];
- int value;
-
- make_file_name(filenum, filename);
- if ((fileptr = fopen(filename, "r")) == (FILE *) NULL) {
- fprintf(stderr, "Error in reopening final merge file.");
- exit(1);
- }
-
- while (fgets(line, MAX_STR_LEN, fileptr) != (char *) NULL)
- fputs(line, stdout);
- if (fclose(stdout) != NULL) {
- fprintf(stderr, "Unable to close output file.");
- exit(1);
- }
-
- if (fclose(fileptr) != NULL) {
- fprintf(stderr, "Unable to close merge file.");
- exit(1);
- }
- unlink(filename);
- }
-
-
- /* GET_SORT_BUFFER */
-
- BOOL get_sort_buffer(buffer)
- char *buffer;
- {
- int curchar;
-
- next_buf_avail = 0; /* initialize globals */
- next_line = 0;
- lines[0] = 0;
-
- while (((curchar = getc(infile)) != EOF) && (curchar != CTRL_Z)) {
- buffer[next_buf_avail++] = curchar;
-
- if (curchar == '\n') {
- buffer[next_buf_avail -1] = EOS;
- lines[++next_line] = next_buf_avail;
-
- if ((next_line >= MAX_NUM_LINES) ||
- (next_buf_avail > (BUFFER_SIZE - MAX_STR_LEN)))
- break;
- }
- }
-
- buffer[next_buf_avail -1] = EOS;
-
- return(curchar != EOF);
- }
-
- /* MAKE_FILE_NAME */
-
- void make_file_name(filenum, destination)
- int filenum;
- char destination[];
- {
- char numbuffer[10];
-
- destination[0] = EOS;
- strcat(destination, TEMP_FILE_START);
- sprintf(numbuffer, "%d", filenum);
- if (strlen(numbuffer) > (8 - strlen(TEMP_FILE_START))) {
- fprintf(stderr, "Too many merge files. ");
- exit(1);
- }
- strcat(destination, numbuffer);
- strcat(destination, TEMP_FILE_END);
- }
-
-
- /* MERGE */
-
- void merge(merge_files, outfile, numfiles)
- FILE *merge_files[], *outfile;
- int numfiles;
- {
- char merge_buf[MERGE_ORDER * MAX_STR_LEN];
- int merge_lines[MERGE_ORDER], i, j;
-
- for (i = 0, j = 0; (i < numfiles); i++, j += MAX_STR_LEN) {
- merge_lines[i] = j;
- fgets(&merge_buf[j], MAX_STR_LEN, merge_files[i]);
- }
-
- sort_buffer(merge_buf, merge_lines, numfiles);
-
- while (numfiles > 0) {
- fputs(&merge_buf[merge_lines[0]], outfile);
-
- if (fgets(&merge_buf[merge_lines[0]], MAX_STR_LEN,
- merge_files[merge_lines[0] / MAX_STR_LEN])
- == (char *) NULL) {
- merge_lines[0] = merge_lines[--numfiles];
- }
- resort_merge_buffer(merge_buf, merge_lines, numfiles);
- }
- }
-
- /* OPEN_MERGE_INPUT */
-
- void open_merge_input(start, stop)
- int start, stop;
- {
- int i;
- int range = stop - start;
- char file_name[FILE_NAME_LEN];
-
- if (range > MERGE_ORDER) {
- fprintf(stderr, "Internal error.");
- exit(1);
- }
- for (i = 0; (i < range); i++) {
- make_file_name(start + i, file_name);
- if ((merge_files[i] = fopen(file_name, "r")) == (FILE *) NULL) {
- fprintf(stderr, "Error in reopening merge file.");
- exit(1);
- }
- }
- }
-
-
- /* OPEN_OUT_FILE */
-
- FILE *open_out_file(filenum)
- int filenum;
- {
- FILE *fileptr;
- char file_name[FILE_NAME_LEN];
-
- make_file_name(filenum, file_name);
- if ((fileptr = fopen(file_name, "w")) == (FILE *) NULL) {
- fprintf(stderr, "Error in opening merge file.");
- exit(1);
- }
- return(fileptr);
- }
-
- /* RESORT_MERGE_BUFFER */
-
- void resort_merge_buffer(merge_buf, merge_lines, numfiles)
- char merge_buf[];
- int merge_lines[];
- int numfiles;
- {
- register int i = 0;
- register int j = 1;
- int temp;
-
- while (j < numfiles) {
- if (j < (numfiles - 1))
- if (compare_strings(&merge_buf[merge_lines[j]],
- &merge_buf[merge_lines[j + 1]]) > 0)
- j++;
- if (compare_strings(&merge_buf[merge_lines[i]],
- &merge_buf[merge_lines[j]]) <= 0)
- break;
- temp = merge_lines[j];
- merge_lines[j] = merge_lines[i];
- merge_lines[i] = temp;
- i = j;
- j = i + i;
- }
- }
-
- /* REMOVE_FILES */
-
- void remove_files(merge_files, start, stop)
- FILE *merge_files[];
- int start, stop;
- {
- register int i;
- register int range = stop - start;
- char file_name[FILE_NAME_LEN];
-
- if (range > MERGE_ORDER) {
- fprintf(stderr, "internal error.");
- exit(1);
- }
- for (i = 0; (i < range); i++) {
- make_file_name(start + i, file_name);
- if (fclose(merge_files[i]) != NULL) {
- fprintf(stderr, "Unable to close merge file.");
- exit(1);
- }
- unlink(file_name);
- }
- }
-
-
- /* SORT_BUFFER */
-
- void sort_buffer(buffer, lineptr, last_in_buf)
- char buffer[];
- int last_in_buf;
- int lineptr[];
- {
- int gap, i, temp;
- register int j, jg;
-
- for (gap = last_in_buf / 2; (gap > 0); gap /= 2)
- for (i = gap; (i < last_in_buf); i++)
- for (j = i - gap; (j >= 0); j -= gap) {
- jg = j + gap;
- if (compare_strings(&buffer[lineptr[j]],
- &buffer[lineptr[jg]]) <= 0)
- break;
- temp = lineptr[jg];
- lineptr[jg] = lineptr[j];
- lineptr[j] = temp;
- }
- }
-
- /* WRITE_LINES */
-
- void write_lines(outfile, buffer)
- FILE *outfile;
- char buffer[];
- {
- int i;
-
- for (i = 0; (i < next_line); i++) {
- fputs(&buffer[lines[i]], outfile);
- putc('\n', outfile);
- }
- }