ftp.freefriends.org

home *** CD-ROM | disk | FTP | other *** search

/ ftp.freefriends.org / ftp.freefriends.org.tar / ftp.freefriends.org / arnold / Source / mush.rstevens.tar.gz / mush.tar / glob.c.orig < prev next >

Wrap

Text File | 1990-05-03 | 20KB | 810 lines

#include "mush.h" #include "glob.h" /* * Buried somewhere in here is the skeleton of a pattern matcher posted * by koblas@mips.COM (David Koblas). It has been hacked almost beyond * recognition to handle more complex patterns, and directory search has * been added (patterns are split at '/' characters when file globbing). */ #ifdef TEST /* Define TEST to build a stand-alone file globbing program */ extern char *malloc(), *realloc(); #define getpath(x,y) (*(y) = 0, (x)) #define Access access #define Strcpy(x,y) (strcpy(x,y), strlen(x)) #define savestr(x) (strcpy(malloc(strlen(x)+1),x)) #ifndef max #define max(x,y) ((x) > (y) ? (x) : (y)) #endif /* max */ #ifndef min #define min(x,y) ((x) > (y) ? (y) : (x)) #endif /* min */ #define xfree free #undef wprint #define wprint printf #define debug 0 #undef sprintf #define TESTGLOB(str1,str2) \ printf("%s %s = %s\n",str1,str2,glob(str1,str2)?"TRUE":"FALSE") main(argc, argv) int argc; char **argv; { char **e; int f; if (argc > 1) while (*++argv) { (void) printf("%s -->\n", *argv); if (f = filexp(*argv, &e)) { columnate(f, e, 0); } } #ifdef TEST2 /* Define TEST2 to automatically run these test cases */ TESTGLOB("abcdefg", "abcdefg"); TESTGLOB("abcdefg", "a?cd?fg"); TESTGLOB("abcdefg", "ab[cde]defg"); TESTGLOB("abcdefg", "ab[a-z]defg"); TESTGLOB("abcdefg", "ab[a-z]defg"); TESTGLOB("ab]defg", "ab[a]c]defg"); TESTGLOB("ab]defg", "ab[a\\]c]defg"); TESTGLOB("abcdefg", "ab*fg"); TESTGLOB("./bc/def/gh/ij", "*de*"); TESTGLOB("./der/den/deq/der/", "*deq*"); TESTGLOB("./bc/def/gh/ij", "*ij"); TESTGLOB("./ij", ".?ij"); TESTGLOB("./bc/def/gh/ij", "./*"); TESTGLOB("abcdef", "*def"); TESTGLOB("abcdef", "*abcdef"); TESTGLOB("abcdef", "abc*"); TESTGLOB("abcdef", "abcdef*"); TESTGLOB("abcdef", "*?*{xxx,,yy}"); TESTGLOB("abcdef", "abcde{f}"); TESTGLOB("abcdef", "abcdef{xxx,,yyy}"); TESTGLOB("abcdef", "abc{def,qwrx}"); TESTGLOB("abcdef", "abc{ab,def,qwrx}"); TESTGLOB("abcdef", "{naqrwer,fuwnwer,as,abc,a}{ab,def,qwrx}"); TESTGLOB("abcdef", "{naqrwer,*,as,abc,a}{ab,def,qwrx}"); TESTGLOB("abcdef", "{{a*,b*},as,a}{ab,def,qwrx}"); TESTGLOB("abcdef", "{{c*,b*},as,a}{ab,def,qwrx}"); TESTGLOB("abcdef", "{{c*,?b*},as,a}{ab,def,qwrx}"); TESTGLOB("abcdef", "{naqrwer,fuwnwer,as,abc,a}{ab,d*f,qwrx}"); #endif /* TEST2 */ } char * any(s1, s2) register char *s1, *s2; { register char *p; if (!s1 || !*s1 || !s2 || !*s2) return 0; for( ; *s1; s1++) { for(p = s2; *p; p++) if (*p == *s1) return s1; } return 0; } #endif /* TEST */ /* * Make a string into a one-element vector */ char ** unitv(s) char *s; { char **v; if (v = (char **)malloc((unsigned)(2 * sizeof(char *)))) { v[0] = savestr(s); v[1] = NULL; } return v; } /* * Append one vector to another */ catv(s1, v1, s2, v2) int s1, s2; char ***v1, **v2; { int i; if (s1 < 0 || !v1) return -1; if (s2 < 0 || !v2) return s1; /* realloc(NULL, size) should be legal, but Sun doesn't support it. */ if (*v1) *v1 = (char **)realloc(*v1,(unsigned)((s1+s2+1) * sizeof(char **))); else *v1 = (char **)malloc((unsigned)((s1+s2+1) * sizeof(char **))); if (*v1) { for (i = 0; i < s2 && v2[i]; i++) (*v1)[s1 + i] = v2[i]; (*v1)[s1 + i] = NULL; xfree(v2); return s1 + i; } return -1; } /* * A duplicate-eliminating comparison for sorting. It treats an empty * string as greater than any other string, and forces empty one of any * pair of of equal strings. Two passes are sufficient to move the empty * strings to the end where they can be deleted by the calling function. * * This is NOT compatible with the ANSI C qsort(), which requires that the * comparison function will not modify its arguments! */ uniqcmp(p1, p2) char **p1, **p2; { int cmp; if (**p1 && !**p2) return -1; if (**p2 && !**p1) return 1; if (cmp = strcmp(*p1, *p2)) return cmp; **p2 = 0; return -1; } /* * Expand a pattern into a list of file names. Returns the number of * matches. As in csh, names generated from pattern sets are returned * even if there are no actual matches. */ filexp(pat, exp) char *pat, ***exp; { char **t1, **t2; int n, new, cnt; if (!exp) return -1; if (!pat || !*pat) return 0; if ((n = sxp(pat, &t1)) > 0) cnt = 0; else return n; *exp = DUBL_NULL; while (n--) if ((new = fxp(t1[n], &t2)) > 0 || new++ == 0 && t2) cnt = catv(cnt, exp, new, t2); if (cnt > 1) { /* Two sort passes to eliminate duplicates -- see uniqcmp() */ qsort((char *)*exp, cnt, sizeof(char *), uniqcmp); qsort((char *)*exp, cnt, sizeof(char *), uniqcmp); while (!(*exp)[cnt - 1][0]) { xfree((*exp)[--cnt]); (*exp)[cnt] = NULL; } } return cnt; } /* * Expand a filename with globbing chars into a list of matching filenames. * Pattern set notatation which crosses directories is not handled, e.g. * "fi{le/exp,nger/h}and" will NOT expand to "file/expand finger/hand". * Such patterns must be pre-expanded by sxp() before calling fxp(). * * The list of expansions is placed in *exp, and the number of matches * is returned, or -1 on an error. */ fxp(name, exp) char *name, ***exp; { char *p; int isdir; if (!exp) return -1; isdir = 1; /* ignore no such file */ p = getpath(name, &isdir); if (isdir < 0) return -1; else if (isdir) return ((*exp = unitv(p)) ? 1 : -1); return pglob(p, 0, exp); } /* * Match all globbings in a path. Mutually recursive with dglob(), below. * The first "skip" characters of the path are not globbed, see dglob(). * * Returns the number of matches, or -1 on an error. *exp is set to the * list of matches. * * If the path has no metachars, it is returned in *exp whether it matches * a real file or not. This allows patterns built by sxp() to be recognized * and returned even when there are no matches (ala csh generation of names * from pattern sets). pglob() still returns zero in this case. */ pglob(path, skip, exp) char *path, ***exp; int skip; { char *t, *t2; int ret = 0; if (!path || !exp || skip < 0) return -1; *exp = DUBL_NULL; /* Must be null in case of zero matches and no sets */ for (t = t2 = path + skip; (t2 = any(t2, META)) && *t2 == '/'; t = t2++) ; if (!t2) { ret = ((*exp = unitv(path)) ? 1 : -1); if (ret > 0 && Access(path, F_OK) < 0) ret = 0; } else { if (t2 = index(t + 1, '/')) *t2++ = 0; if (*t == '/') { *t++ = 0; if (!*path) ret = dglob("/", t, t2, exp); else ret = dglob(path, t, t2, exp); } else { ret = dglob("", t, t2, exp); } } return ret; } /* * Search a directory (possibly recursively) for glob matches. * Argument pat1 is a pattern to be matched in this directory, * and pat2 is a pattern to be matched in matched subdirectories. * * Matches are returned through *exp. */ dglob(dir, pat1, pat2, exp) char *dir, *pat1, *pat2, ***exp; { DIR *dirp; struct dirent *dp; char *b, *d, buf[MAXPATHLEN], **tmp; int n, ret = 0, skip; if (!dir || !exp) return -1; d = (*dir ? dir : "."); if (!(dirp = opendir(d))) return -1; b = buf + Strcpy(buf, dir); if (b > buf && *(b - 1) != '/') *b++ = '/'; skip = b - buf; /* We know this much matches, don't glob it again */ while (ret >= 0 && (dp = readdir(dirp))) { if (fglob(dp->d_name, pat1)) { if (pat2) { (void) sprintf(b, "%s/%s", dp->d_name, pat2); n = pglob(buf, skip, &tmp); ret = catv(ret, exp, n, tmp); } else { (void) strcpy(b, dp->d_name); ret = catv(ret, exp, 1, unitv(buf)); } } } closedir(dirp); return ret; } /* * Match file names. This means that metachars do not match leading ".". */ fglob(str, pat) char *str, *pat; { if (!str || !pat || *str == '.' && *pat != '.') return FALSE; else return glob(str, pat); } /* * Match two concatenated patterns. Mainly for use by sglob(). */ static glob2(str, pat1, pat2) char *str, *pat1, *pat2; { char buf[MAXPATHLEN]; if (!str || !pat1 && !pat2) return FALSE; (void) sprintf(buf, "%s%s", pat1? pat1 : "", pat2? pat2 : ""); return glob(str, buf); } /* * The basic globbing matcher. * * "*" = match 0 or more occurances of anything * "[abc]" = match any of "abc" (ranges supported) * "{xx,yy,...}" = match any of "xx", "yy", ... where * "xx", "yy" can be any pattern or empty * "?" = match any character */ glob(str, pat) char *str, *pat; { int done = FALSE, ret = FALSE; if (!str || !pat) return FALSE; while (*pat && !done && (*str || (*pat == '{' || *pat == '*'))) /*}*/ { /* * First look for a literal match, stepping over backslashes * in the pattern to match against the "protected" character. * Ordering and precendence are important in this expression! */ if (*pat == '\\' && *str == *++pat || *str == *pat) { str++; pat++; } else switch (*pat++) { case '*': /* Match any string */ if (!*pat) { while (*str) str++; break; } /* * Try the rest of the glob against every * possible suffix of the string. A bit * inefficient in cases that eventually fail. */ while (*str && !(ret = glob(str++, pat))) ; return ret; break; case '[': /* Match a set */ repeat: /* If we've hit the end of the set, give up. */ if (!*pat || *pat == ']' || *pat == '\\' && !*++pat) { done = TRUE; break; } /* Check for a range. */ if (*(pat + 1) == '-') { char c = *pat++; /* We don't handle open-ended ranges. */ if (*++pat == ']' || *pat == '\\' && !*++pat) { done = TRUE; break; } if (*str < c || *str > *pat) { pat++; goto repeat; } } else if (*pat != *str) { pat++; goto repeat; } /* * We matched either the range or a literal member of * the set. Skip to the end of the set. */ pat++; while (*pat && *pat != ']') if (*pat++ == '\\' && *pat) pat++; /* * If no pattern remains, the set was never closed, * so don't increment. This will cause a FALSE return. */ if (*pat) { pat++; str++; } break; case '?': /* Match any one character */ str++; break; case '{': /* } Match any of a set of patterns */ return sglob(str, pat - 1, TRPL_NULL); break; default: done = TRUE; } } while (*pat == '*') pat++; return ((*str == '\0') && (*pat == '\0')); } /* * Match a pattern set {s1,s2,...} followed by any other pattern. * Pattern sets and other patterns may nest arbitrarily. * * If "mat" is not a null pointer, a vector of possible expansions * is generated and placed in *mat; otherwise, the expansions are * matched against str and a truth value is returned ("/" is NOT * treated as a directory separator in this case). NOTE: The vector * of expansions may still contain nested pattern sets, which must * be expanded separately. See sxp(). * * Currently allows at most 256 alternatives per set. Enough? :-) */ static sglob(str, pat, mat) char *str, *pat, ***mat; { char *p, *newpat[256], *oldpat[256], buf[MAXPATHLEN], *b = buf; int copy = 1, nest = 0, i = 0, ret = 0; if (!pat) return FALSE; while (*pat) { if (copy) if (*pat != '{') /*}*/ { if (*pat == '\\' && pat[1]) *b++ = *pat++; *b++ = *pat++; continue; } else { copy = 0; pat++; } p = pat; while (*pat && (nest || *pat != ',' && /*{*/ *pat != '}')) { if (*pat == '\\') pat++; else if (*pat == '{') nest++; else if (*pat == '}') nest--; if (*pat) pat++; } if (*pat) { oldpat[i] = pat; newpat[i++] = p; if (*pat != ',') { *pat++ = 0; break; } else *pat++ = 0; } } oldpat[i] = NULL; if (i > 0 && mat) { *mat = (char **)malloc((unsigned)((i + 1) * sizeof(char *))); if (*mat) (*mat)[i] = NULL; else return -1; ret = i; } while (!mat && i-- > 0) if (ret = glob2(str, newpat[i], pat)) break; for (i = 0; oldpat[i]; i++) { if (mat && *mat) { (void) sprintf(b, "%s%s", newpat[i], pat); (*mat)[i] = savestr(buf); } if (oldpat[i + 1]) oldpat[i][0] = ','; else oldpat[i][0] = /*{*/ '}'; } if (ret == 0 && b > buf && mat) { *b = 0; ret = ((*mat = unitv(buf)) ? 1 : -1); } return ret; } /* * Pre-expand pattern set notations so sets containing "/" separators * can be globbed successfully. Returns the number of expansions. */ sxp(pat, exp) char *pat, ***exp; { char **t1 = DUBL_NULL, **t2; int n, new, cnt = 0; if ((n = sglob(NULL, pat, &t1)) < 2) { *exp = t1; return n; } *exp = DUBL_NULL; while (n-- && cnt >= 0) { new = sxp(t1[n], &t2); cnt = catv(cnt, exp, new, t2); } xfree(t1); return cnt; } /* * Generate the "glob difference" of two vectors (*argvp and patv). * The "glob difference" means to remove all strings from argv that * match any of the glob patterns in patv. * * Returns the number of strings remaining in *argvp. The strings "removed" * from argv are actually left at the end of *argvp, so they can still be * accessed; their number will of course be argc - (returned value). */ gdiffv(argc, argvp, patc, patv) int argc, patc; char ***argvp, **patv; { char **argv, *t; int ac, pc, oldac = argc; if (argc < 1 || patc < 1 || !patv || !*patv) return argc; if (!argvp || !(argv = *argvp) || !*argv) return -1; for (ac = 0; ac < argc && argv[ac]; ac++) { for (pc = 0; ac < argc && pc < patc && patv[pc]; pc++) { /* * We shouldn't cross '/' characters, so test * only the "tail" of each element of argv. */ if (!(t = rindex(argv[ac], '/'))) t = argv[ac]; if (glob(t, patv[pc])) { /* Move matches to the end and reduce argc */ t = argv[ac]; argv[ac] = argv[--argc]; argv[argc] = t; /* Start patterns over on the new string */ pc = -1; /* It'll be incremented to 0 */ } } } /* * Sort the two parts of the argv. uniqcmp() works here only if * there already are no duplicates, but we assume that for now. */ if (argc) qsort((char *)argv, argc, sizeof(char *), uniqcmp); if (oldac > argc) qsort((char *)&argv[argc], oldac - argc, sizeof(char *), uniqcmp); return argc; } /* * Generate the longest common prefix from all strings in a vector * If "skip" is nonzero, that many chars are assumed to be in common * and are not tested. WARNING: skip must be <= than the length of * the shortest string in the vector! Safest to call with skip = 0. * * Returns the length of the longest common prefix. */ lcprefix(vec, skip) char **vec; int skip; { char c, **v; int done = FALSE; if (!vec || !*vec || skip < 0) return 0; do { for (v = vec + 1, c = vec[0][skip]; c && *v; v++) if (v[0][skip] != c) { done = TRUE; break; } } while (!done && c && ++skip); return skip; } #define MAXCOLS 8 /* Max number of columns of words to make */ #define MINWIDTH 10 /* Minimum width of each column of words */ #ifdef CURSES #define MAXWIDTH (iscurses? COLS : 80) #else /* CURSES */ #define MAXWIDTH 80 /* Maximum width of all columns */ #endif /* CURSES */ /* * Print a vector in columns * * If "skip" is nonzero, that many chars are assumed to be in common * and are not printed. WARNING: skip must be <= than the length of * the shortest string in the vector! Safest to call with skip = 0. */ columnate(argc, argv, skip) int argc; char **argv; int skip; { int colstep, colwidth[MAXCOLS + 1]; int maxcols = min(argc, MAXCOLS); int minwidth, maxwidth, *widths; int maxword = 0, n, c; if (argc <= 0 || !argv || !*argv) return -1; if (!(widths = (int *)malloc((unsigned)((argc + 1) * sizeof(int))))) return -1; /* * Compute the widths of all words in the vector, and * remember the maximum width and which word had it. * Also remember the minimum width. */ for (minwidth = MAXWIDTH, maxwidth = n = 0; n < argc; n++) { widths[n] = max(strlen(argv[n] + skip) + 2, MINWIDTH); if (widths[n] > MAXWIDTH - MINWIDTH) break; if (widths[n] > maxwidth) { maxwidth = widths[n]; maxword = n; } if (widths[n] < minwidth) minwidth = widths[n]; } for (; maxcols > 0; maxcols--) { if (argc % maxcols) colstep = argc / maxcols + 1; else colstep = argc / maxcols; colwidth[MAXCOLS] = 0; for (c = 0; c < maxcols; c++) { colwidth[c] = 0; for (n = c * colstep; n < (c + 1) * colstep && n < argc; n++) colwidth[c] = max(colwidth[c], widths[n]); colwidth[MAXCOLS] += colwidth[c]; } if (colwidth[MAXCOLS] <= MAXWIDTH) break; } xfree(widths); if (maxcols < 2 && minwidth <= MAXWIDTH / 2) { /* * The maxword fills too much screen, so redo everything * above it, print maxword, then do everything below it. */ if (maxword > 0 && columnate(maxword, argv, skip) < 0) return -1; wprint("%s\n", argv[maxword] + skip); if (argc - maxword < 2) return 0; return columnate(argc - maxword - 1, &argv[maxword + 1], skip); } for (n = 0; n < colstep; n++) { for (c = 0; c < maxcols && n + c * colstep < argc - colstep; c++) wprint("%-*.*s", colwidth[c], colwidth[c], argv[n + c * colstep] + skip); wprint("%s\n", argv[n + c * colstep] + skip); } return 0; } #ifndef DIRECTORY #undef NULL #define NULL 0 /* * 4.2BSD directory access emulation for non-4.2 systems. * Based upon routines in appendix D of Portable C and Unix System * Programming by J. E. Lapin (Rabbit Software). * * No responsibility is taken for any error in accuracies inherent * either to the comments or the code of this program, but if * reported to me then an attempt will be made to fix them. */ /* Support for Berkeley directory reading routines on a V7/SysV file * system. */ /* Open a directory. */ DIR * opendir(name) char *name ; { register DIR *dirp ; register int fd ; if ((fd = open(name, 0)) == -1) return NULL ; if ((dirp = (DIR *) malloc(sizeof(DIR))) == NULL) { close(fd) ; return NULL ; } dirp->dd_fd = fd ; dirp->dd_loc = 0 ; return dirp ; } /* Read an old style directory entry and present it as a new one. */ #define ODIRSIZ 14 struct olddirent { short od_ino ; char od_name[ODIRSIZ] ; } ; /* Get next entry in a directory. */ struct dirent * readdir(dirp) register DIR *dirp ; { register struct olddirent *dp ; static struct dirent dir ; for (;;) { if (dirp->dd_loc == 0) { dirp->dd_size = read(dirp->dd_fd, dirp->dd_buf, DIRBLKSIZ) ; if (dirp->dd_size <= 0) return NULL ; } if (dirp->dd_loc >= dirp->dd_size) { dirp->dd_loc = 0 ; continue ; } dp = (struct olddirent *)(dirp->dd_buf + dirp->dd_loc) ; dirp->dd_loc += sizeof(struct olddirent) ; if (dp->od_ino == 0) continue ; dir.d_fileno = dp->od_ino ; strncpy(dir.d_name, dp->od_name, ODIRSIZ) ; dir.d_name[ODIRSIZ] = '\0' ; /* Ensure termination. */ dir.d_namlen = strlen(dir.d_name) ; dir.d_reclen = DIRSIZ(&dir) ; return(&dir) ; } } /* Close a directory. */ void closedir(dirp) register DIR *dirp ; { close(dirp->dd_fd) ; dirp->dd_fd = -1 ; dirp->dd_loc = 0 ; xfree(dirp) ; } #endif /* DIRECTORY */