home *** CD-ROM | disk | FTP | other *** search
File List | 1990-03-09 | 15.0 KB | 479 lines |
- C PROGRAMMING COLUMN
- by Al Stevens
-
-
- [LISTING ONE]
-
-
- /* ---------------------- csort.h --------------------------- */
-
- #define NOFLDS 5 /* maximum number of fields to sort */
- #define MOSTMEM 50000U /* most memory for sort buffer */
- #define LEASTMEM 10240 /* least memory for sort buffer */
-
- struct s_prm { /* sort parameters */
- int rc_len; /* record length */
- struct {
- int f_pos; /* 1st position of field (rel 1) */
- int f_len; /* length of field */
- char ad; /* a = ascending; d = descending */
- } s_fld [NOFLDS]; /* one per field */
- };
-
- struct bp { /* one for each sequence in merge buffer */
- char *rc; /* -> record in merge buffer */
- int rbuf; /* records left in buffer this sequence */
- int rdsk; /* records left on disk this sequence */
- };
-
- int init_sort(struct s_prm *prms); /* Initialize the sort */
- void sort(char *rcd); /* Pass records to Sort */
- char *sort_op(void); /* Retrieve sorted records*/
- void sort_stats(void); /* Display sort statistics*/
-
-
-
- [LISTING TWO]
-
- /* --------------------- filesort.c ------------------------ */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "csort.h"
-
- /* sort a file:
- * command line: filesort filename length {f1, l1, az1, ...}
- * filename is the name of the input file
- * length is the length of a record
- * for each field:
- * f1 is the field position relative to 1
- * l1 is the field lengths
- * az1 = A = ascending, D = descending
- */
-
- static struct s_prm sp;
- static void usage(void);
-
- void main(int argc, char *argv[])
- {
- int i, fct = 0;
- FILE *fpin, *fpout;
- char *buf;
- char filename[64];
-
- /* ------- get the file name from the command line ------ */
- if (argc-- > 1)
- strcpy(filename, argv++[1]);
- else
- usage();
- /* ----- get the record length from the command line ---- */
- if (argc-- > 1)
- sp.rc_len = atoi(argv++[1]);
- else
- usage();
- /* ----- get field definitions from the command line ---- */
- do {
- if (argc < 4)
- usage();
- sp.s_fld[fct].f_pos = atoi(argv++[1]);
- sp.s_fld[fct].f_len = atoi(argv++[1]);
- sp.s_fld[fct].ad = *argv++[1];
- argc -= 3;
- fct++;
- } while (argc > 1);
-
- printf("\nFile: %s, length", filename, sp.rc_len);
- for (i = 0; i < fct; i++)
- printf("\nField %d: position %d, length %d, %s",
- i+1,
- sp.s_fld[i].f_pos,
- sp.s_fld[i].f_len,
- sp.s_fld[i].ad == 'd' ?
- "descending" : "ascending");
-
- if ((fpin = fopen(filename, "rb")) == NULL) {
- printf("\nInput file not found");
- exit(1);
- }
- if ((buf = malloc(sp.rc_len)) == NULL ||
- init_sort(&sp) == -1) {
- printf("\nInsufficient memory to sort");
- exit(1);
- }
- /* ------ sort the input records ------- */
- while (fread(buf, sp.rc_len, 1, fpin) == 1)
- sort(buf);
- sort(NULL);
- fclose(fpin);
- /* ----- retrieve the sorted output records ------ */
- fpout = fopen("SORTED.DAT", "wb");
- while ((buf = sort_op()) != NULL)
- fwrite(buf, sp.rc_len, 1, fpout);
- fclose(fpout);
- sort_stats();
- free(buf);
- }
-
- static void usage(void)
- {
- printf("\nusage: filesort fname len {pos length ad...}");
- exit(1);
- }
-
-
- [LISTING THREE]
-
- /* ----------------------- csort.c ------------------------- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "csort.h"
-
- static struct s_prm *sp; /* structure of sort parameters */
- static unsigned totrcd; /* total records sorted */
- static int no_seq; /* counts sequences */
- static int no_seq1;
- static unsigned bspace; /* available buffer space */
- static int nrcds; /* # of records in sort buffer */
- static int nrcds1;
- static char *bf, *bf1; /* points to sort buffer */
- static int inbf; /* variable records in sort buffer */
- static char **sptr; /* -> array of buffer pointers */
- static char *init_sptr; /* pointer to appropriated buffer */
- static int rcds_seq; /* rcds / sequence in merge buffer */
- static FILE *fp1, *fp2; /* sort work file fds */
- static char fdname[15]; /* sort work name */
- static char f2name[15]; /* sort work name */
-
- static int comp(char **a, char **b);
- static char *appr_mem(unsigned *h);
- static FILE *wopen(char *name, int n);
- static void dumpbuff(void);
- static void merge(void);
- static void prep_merge(void);
-
- /* -------- initialize sort global variables---------- */
- int init_sort(struct s_prm *prms)
- {
- sp = prms;
- if ((bf = appr_mem(&bspace)) != NULL) {
- nrcds1 = nrcds = bspace / (sp->rc_len + sizeof(char *));
- init_sptr = bf;
- sptr = (char **) bf;
- bf += nrcds * sizeof(char *);
- fp1 = fp2 = NULL;
- totrcd = no_seq = inbf = 0;
- return 0;
- }
- else
- return -1;
- }
-
- /* --------- Function to accept records to sort ------------ */
- void sort(char *s_rcd)
- {
- if (inbf == nrcds) { /* if the sort buffer is full */
- qsort(init_sptr, inbf,
- sizeof (char *), comp);
- if (s_rcd) { /* if there are more records to sort */
- dumpbuff(); /* dump the buffer to a sort work file*/
- no_seq++; /* count the sorted sequences */
- }
- }
- if (s_rcd != NULL) {
- /* --- this is a record to sort --- */
- totrcd++;
- /* --- put the rcd addr in the pointer array --- */
- *sptr = bf + inbf * sp->rc_len;
- inbf++;
- /* --- move the rcd to the buffer --- */
- memmove(*sptr, s_rcd, sp->rc_len);
- sptr++; /* point to next array entry */
- }
- else { /* null pointer means no more rcds */
- if (inbf) { /* any records in the buffer? */
- qsort(init_sptr, inbf,
- sizeof (char *), comp);
- if (no_seq) /* if this isn't the only sequence*/
- dumpbuff(); /* dump the buffer to a work file */
- no_seq++; /* count the sequence */
- }
- no_seq1 = no_seq;
- if (no_seq > 1) /* if there is more than 1 sequence */
- prep_merge(); /* prepare for the merge */
- }
- }
-
- /* -------------- Prepare for the merge ----------------- */
- static void prep_merge()
- {
- int i;
- struct bp *rr;
- unsigned n_bfsz;
-
- memset(init_sptr, '\0', bspace);
- /* -------- merge buffer size ------ */
- n_bfsz = bspace - no_seq * sizeof(struct bp);
- /* ------ # rcds/seq in merge buffer ------- */
- rcds_seq = n_bfsz / no_seq / sp->rc_len;
- if (rcds_seq < 2) {
- /* ---- more sequence blocks than will fit in buffer,
- merge down ---- */
- fp2 = wopen(f2name, 2); /* open a sort work file */
- while (rcds_seq < 2) {
- FILE *hd;
- merge(); /* binary merge */
- hd = fp1; /* swap fds */
- fp1 = fp2;
- fp2 = hd;
- nrcds *= 2;
- /* ------ adjust number of sequences ------ */
- no_seq = (no_seq + 1) / 2;
- n_bfsz = bspace - no_seq * sizeof(struct bp);
- rcds_seq = n_bfsz / no_seq / sp->rc_len;
- }
- }
- bf1 = init_sptr;
- rr = (struct bp *) init_sptr;
- bf1 += no_seq * sizeof(struct bp);
- bf = bf1;
-
- /* fill the merge buffer with records from all sequences */
-
- for (i = 0; i < no_seq; i++) {
- fseek(fp1, (long) i * ((long) nrcds * sp->rc_len),
- SEEK_SET);
- /* ------ read them all at once ------ */
- fread(bf1, rcds_seq * sp->rc_len, 1, fp1);
- rr->rc = bf1;
- /* --- the last seq has fewer rcds than the rest --- */
- if (i == no_seq-1) {
- if (totrcd % nrcds > rcds_seq) {
- rr->rbuf = rcds_seq;
- rr->rdsk = (totrcd % nrcds) - rcds_seq;
- }
- else {
- rr->rbuf = totrcd % nrcds;
- rr->rdsk = 0;
- }
- }
- else {
- rr->rbuf = rcds_seq;
- rr->rdsk = nrcds - rcds_seq;
- }
- rr++;
- bf1 += rcds_seq * sp->rc_len;
- }
- }
-
- /* ------- Merge the work file down
- This is a binary merge of records from sequences
- in fp1 into fp2. ------ */
- static void merge()
- {
- int i;
- int needy, needx; /* true = need a rcd from (x/y) */
- int xcnt, ycnt; /* # rcds left each sequence */
- int x, y; /* sequence counters */
- long adx, ady; /* sequence record disk addresses */
-
- /* --- the two sets of sequences are x and y ----- */
- fseek(fp2, 0L, SEEK_SET);
- for (i = 0; i < no_seq; i += 2) {
- x = y = i;
- y++;
- ycnt =
- y == no_seq ? 0 : y == no_seq - 1 ?
- totrcd % nrcds : nrcds;
- xcnt = y == no_seq ? totrcd % nrcds : nrcds;
- adx = (long) x * (long) nrcds * sp->rc_len;
- ady = adx + (long) nrcds * sp ->rc_len;
- needy = needx = 1;
- while (xcnt || ycnt) {
- if (needx && xcnt) { /* need a rcd from x? */
- fseek(fp1, adx, SEEK_SET);
- adx += (long) sp->rc_len;
- fread(init_sptr, sp->rc_len, 1, fp1);
- needx = 0;
- }
- if (needy && ycnt) { /* need a rcd from y? */
- fseek(fp1, ady, SEEK_SET);
- ady += sp->rc_len;
- fread(init_sptr+sp->rc_len, sp->rc_len, 1, fp1);
- needy = 0;
- }
- if (xcnt || ycnt) { /* if anything is left */
- /* ---- compare the two sequences --- */
- if (!ycnt || (xcnt &&
- (comp(&init_sptr, &init_sptr + sp->rc_len))
- < 0)) {
- /* ----- record from x is lower ---- */
- fwrite(init_sptr, sp->rc_len, 1, fp2);
- --xcnt;
- needx = 1;
- }
- else if (ycnt) { /* record from y is lower */
- fwrite(init_sptr+sp->rc_len,
- sp->rc_len, 1, fp2);
- --ycnt;
- needy = 1;
- }
- }
- }
- }
- }
-
- /* -------- Dump the sort buffer to the work file ---------- */
- static void dumpbuff()
- {
- int i;
-
- if (fp1 == NULL)
- fp1 = wopen(fdname, 1);
- sptr = (char **) init_sptr;
- for (i = 0; i < inbf; i++) {
- fwrite(*(sptr + i), sp->rc_len, 1, fp1);
- *(sptr + i) = 0;
- }
- inbf = 0;
- }
-
- /* --------------- Open a sort work file ------------------- */
- static FILE *wopen(char *name, int n)
- {
- FILE *fp;
- strcpy(name, "sortwork.000");
- name[strlen(name) - 1] += n;
- if ((fp = fopen(name, "wb+")) == NULL) {
- printf("\nFile error");
- exit(1);
- }
- return fp;
- }
-
- /* --------- Function to get sorted records ----------------
- This is called to get sorted records after the sort is done.
- It returns pointers to each sorted record.
- Each call to it returns one record.
- When there are no more records, it returns NULL. ------ */
-
- char *sort_op()
- {
- int j = 0;
- int nrd, i, k, l;
- struct bp *rr;
- static int r1 = 0;
- char *rtn;
- long ad, tr;
-
- sptr = (char **) init_sptr;
- if (no_seq < 2) {
- /* -- with only 1 sequence, no merge has been done -- */
- if (r1 == totrcd) {
- free(init_sptr);
- fp1 = fp2 = NULL;
- r1 = 0;
- return NULL;
- }
- return *(sptr + r1++);
- }
- rr = (struct bp *) init_sptr;
- for (i = 0; i < no_seq; i++)
- j |= (rr + i)->rbuf | (rr + i)->rdsk;
-
- /* -- j will be true if any sequence still has records - */
- if (!j) {
- fclose(fp1); /* none left */
- remove(fdname);
- if (fp2) {
- fclose(fp2);
- remove(f2name);
- }
- free(init_sptr);
- fp1 = fp2 = NULL;
- r1 = 0;
- return NULL;
- }
- k = 0;
-
- /* --- find the sequence in the merge buffer
- with the lowest record --- */
- for (i = 0; i < no_seq; i++)
- k = ((comp( &(rr + k)->rc, &(rr + i)->rc) < 0) ? k : i);
-
- /* --- k is an integer sequence number that offsets to the
- sequence with the lowest record ---- */
-
- (rr + k)->rbuf--; /* decrement the rcd counter */
- rtn = (rr + k)->rc; /* set the return pointer */
- (rr + k)->rc += sp->rc_len;
- if ((rr + k)->rbuf == 0) {
- /* ---- the sequence got empty ---- */
- /* --- so get some more if there are any --- */
- rtn = bf + k * rcds_seq * sp->rc_len;
- memmove(rtn, (rr + k)->rc - sp->rc_len, sp->rc_len);
- (rr + k)->rc = rtn + sp->rc_len;
- if ((rr + k)->rdsk != 0) {
- l = ((rcds_seq-1) < (rr+k)->rdsk) ?
- rcds_seq-1 : (rr+k)->rdsk;
- nrd = k == no_seq - 1 ? totrcd % nrcds : nrcds;
- tr = (long) ((k * nrcds + (nrd - (rr + k)->rdsk)));
- ad = tr * sp->rc_len;
- fseek(fp1, ad, SEEK_SET);
- fread(rtn + sp->rc_len, l * sp->rc_len, 1, fp1);
- (rr + k)->rbuf = l;
- (rr + k)->rdsk -= l;
- }
- else
- memset((rr + k)->rc, 127, sp->rc_len);
- }
- return rtn;
- }
-
- /* ------- Function to display sort stats -------- */
- void sort_stats()
- {
- printf("\n\n\nRecord Length = %d",sp->rc_len);
- printf("\n%d records sorted",totrcd);
- printf("\n%d sequence",no_seq1);
- if (no_seq1 != 1)
- putchar('s');
- printf("\n%u characters of sort buffer", bspace);
- printf("\n%d records per buffer\n\n",nrcds1);
- }
-
- /* ----- appropriate available memory ----- */
- static char *appr_mem(unsigned *h)
- {
- char *buff = NULL;
-
- *h = (unsigned) MOSTMEM + 1024;
- while (buff == NULL && *h > LEASTMEM) {
- *h -= 1024;
- buff = malloc(*h);
- }
- return buff;
- }
-
- /* ------- compare function for sorting, merging -------- */
- static int comp(char **a, char **b)
- {
- int i, k;
-
- if (**a == 127 || **b == 127)
- return (int) **a - (int) **b;
- for (i = 0; i < NOFLDS; i++) {
- if (sp->s_fld[i].f_pos == 0)
- break;
- if ((k = strnicmp((*a)+sp->s_fld[i].f_pos - 1,
- (*b)+sp->s_fld[i].f_pos - 1,
- sp->rc_len)) != 0)
- return (sp->s_fld[i].ad == 'd')?-k:k;
- }
- return 0;
- }
-
-