home *** CD-ROM | disk | FTP | other *** search
- #ifndef lint
- static char *rcsid = "$Header: h:/tmp/tmp/strsed\\RCS\\strsed.c,v 1.17 1990/03/08 20:44:32 terry Exp $";
- #endif lint
-
- /*
- * Strsed.c
- *
- * ed(1)/tr(1)-like search, replace, transliterate. See the
- * manpage for details.
- *
- * Usage:
- *
- * strsed(string, pattern, 0);
- * char *string;
- * char *pattern;
- * or
- * strsed(string, pattern, range);
- * char *string;
- * char *pattern;
- * int range[2];
- *
- *
- * Terry Jones
- * terry@distel.pcs.com
- * ...!{pyramid,unido}!pcsbst!distel!terry
- *
- * PCS Computer Systeme GmbH
- * Pfaelzer-Wald-Str 36
- * 8000 Muenchen 90
- * West Germany 49-89-68004288
- *
- * January 8th, 1990.
- *
- */
-
- /*
- * $Log: strsed.c,v $
- * Revision 1.17 90/03/08 20:44:32 terry
- * Final cleanup.
- *
- * Revision 1.16 90/03/07 15:46:35 terry
- * Changed backslash_eliminate to only malloc on
- * REPLACEMENT type. Added ".*" optimisation so that
- * the regex functions are never called.
- *
- * Revision 1.15 90/03/06 22:27:49 terry
- * Removed varargs stuff since the 3rd argument is now
- * compulsory. Cleaned up. A few comments even.
- *
- * Revision 1.14 90/03/06 21:50:28 terry
- * Touched up memory stuff. Added mem_find(). Changed
- * buf_sz and buf_inc to be a reasonable refelection
- * of the length of the input.
- *
- * Revision 1.13 90/03/06 20:22:48 terry
- * Major rearrangements. Added mem(), mem_init(), mem_save(),
- * mem_free() to handle memory in a vastly improved fashion.
- * Calls to malloc are minimised as far as possible.
- *
- * Revision 1.12 90/03/06 13:23:33 terry
- * Made map static.
- *
- * Revision 1.11 90/01/10 15:51:12 terry
- * checked in with -k by terry at 90.01.18.20.03.08.
- *
- * Revision 1.11 90/01/10 15:51:12 terry
- * *** empty log message ***
- *
- * Revision 1.10 90/01/10 12:48:40 terry
- * Fixed handling of perverted character ranges in nextch().
- * a-f-c now means a-c.
- *
- * Revision 1.9 90/01/10 12:03:48 terry
- * Pounded on space allocation, added more_space,
- * remove free() in build_map, tested tiny buffer sizes etc.
- *
- * Revision 1.8 90/01/09 18:15:12 terry
- * added backslash elimination to str.
- * altered backslash_elimantion to take one of three types
- * REGEX, NORMAL or REPLACEMENT depending on the
- * elimination desired. Changed interpretation of \
- * followed by a single digit to be that character if the
- * type of elimination is NORMAL. i.e. \4 = ^D.
- *
- * Revision 1.7 90/01/09 17:05:05 terry
- * Frozen version for release to comp.sources.unix
- *
- * Revision 1.6 90/01/09 16:47:54 terry
- * Altered pure searching return values to be -1
- *
- * Revision 1.5 90/01/09 14:54:34 terry
- * *** empty log message ***
- *
- * Revision 1.4 90/01/09 14:51:04 terry
- * removed #include <stdio> silliness.
- *
- * Revision 1.2 90/01/09 10:48:22 terry
- * Fixed handling of } and - metacharacters inside
- * transliteration request strings in backslash_eliminate().
- *
- * Revision 1.1 90/01/08 17:41:35 terry
- * Initial revision
- *
- *
- */
-
- #include <ctype.h>
- #include <string.h>
- #ifdef __GNUC__
- #include <stdlib.h>
- #else
- #include <malloc.h>
- #endif
- #include "regex.h"
-
- #define BYTEWIDTH 8
- #define REGEX 0
- #define REPLACEMENT 1
- #define NORMAL 2
-
- /*
- * And this is supposed to make freeing easier. It's a little hard to
- * keep track of what can and cannot be freed in what follows, so I
- * ignore it and every time a malloc is done for one of the things
- * below (and these are the only ones possible) we free if need be and
- * then alloc some more if it can't be avoided. No-one (who is going
- * to free) needs to call malloc then. And no-one need call free.
- * Wonderful in theory...
- */
-
- #define MEM_STR 0
- #define MEM_PAT 1
- #define MEM_FROM 2
- #define MEM_TO 3
- #define MEM_NEWSTR 4
- #define MEM_MAP 5
- #define MEM_MAP_SAVE 6
-
- #define MEM_SLOTS 7
-
- /*
- * This calls mem_free(), which free()s all the allocated storage EXCEPT
- * for the piece whose address is 'n'. If something goes wrong below
- * we call RETURN(0) and if we want to return some address we call RETURN
- * with the address to be returned.
- */
-
- #define RETURN(n) \
- mem_free(n); \
- return (char *)n;
-
- static struct {
- char *s;
- int size;
- int used;
- } mem_slots[MEM_SLOTS];
-
-
- #define more_space(need) \
- if (need && space != -1){ \
- if (space - (need) < 0){ \
- buf_sz += buf_inc + (need) - space; \
- if (!(new_str = (char *)realloc(new_str, (unsigned)buf_sz))){ \
- RETURN(0); \
- } \
- mem_slots[MEM_NEWSTR].s = new_str; \
- mem_slots[MEM_NEWSTR].size = buf_sz; \
- space = buf_inc; \
- } \
- else{ \
- space -= need; \
- } \
- }
-
-
- char *
- strsed(string, pattern, range)
- register char *string;
- register char *pattern;
- int *range;
- {
- extern char *re_compile_pattern();
- extern int re_search();
-
- static char *backslash_eliminate();
- static char *mem();
- static void mem_init();
- static void mem_free();
-
- char *from;
- char *new_str;
- char *pat;
- char *str;
- char *tmp;
- char *to;
- static char map[1 << BYTEWIDTH];
- int buf_sz;
- int buf_inc;
- int global = 0;
- int match;
- int new_pos = 0;
- int search_only = 0;
- int seenbs = 0;
- int space;
- int match_all = 0;
- register int str_len;
- static int first_time = 1;
- static struct re_pattern_buffer re_comp_buf;
- struct re_registers regs;
-
- if (!string || !pattern){
- RETURN(0);
- }
-
- /*
- * If this is the first time we've been called, clear the memory slots.
- */
- if (first_time){
- mem_init();
- }
-
- /*
- * Take our own copies of the string and pattern since we promised
- * in the man page not to hurt the originals.
- */
- str = mem(MEM_STR, strlen(string) + 1);
- str[0] = '\0';
- strcat(str, string);
- pat = mem(MEM_PAT, strlen(pattern) + 1);
- pat[0] = '\0';
- strcat(pat, pattern);
-
- /*
- * If escape sequences are not already removed elsewhere, remove
- * them from the string. If you don't know what you're doing here
- * or are in any doubt, don't define ESCAPED_STRING.
- */
- #ifndef ESCAPED_STRING
- if (!(str = backslash_eliminate(str, NORMAL, MEM_STR))){
- RETURN(0);
- }
- #endif
-
- str_len = strlen(str);
-
- /*
- * Set up the size of our buffer (in which we build the
- * newstring, and the size by which we increment it when
- * (and if) the need arises. There shouldn't be too much
- * growth in the average case. Of course some people will
- * go and do things like
- *
- * strsed(string, "s/.*$/\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0")
- *
- * and they will be somewhat penalised. Oh well.
- *
- */
-
- buf_sz = str_len < 8 ? 16 : str_len << 1;
- buf_inc = buf_sz;
-
- /*
- * Get the action.
- * s = substitue and g = global.
- * anything else is invalid.
- *
- */
- while (*pat && *pat != '/'){
- switch (*pat){
- case 'g':{
- global = 1;
- break;
- }
- case 's':{
- break;
- }
- default:{
- RETURN(0);
- }
- }
- pat++;
- }
-
- if (!*pat){
- RETURN(0);
- }
-
- pat++;
-
- /*
- * Now split 'pat' into its two components. These are delimited (or
- * should be) by (unquoted) '/'. The first we point to with 'from'
- * and the second with 'to'.
- *
- * Someone should write a function to make this sort of thing trivial...
- *
- */
-
- from = to = pat;
-
- while (*to){
- if (seenbs){
- seenbs = 0;
- }
- else{
- if (*to == '\\'){
- seenbs = 1;
- }
- else if (*to == '/'){
- break;
- }
- }
- to++;
- }
-
- if (!*to){
- RETURN(0);
- }
-
- *to++ = '\0';
-
- if (*to){
- tmp = to + strlen(to) - 1;
-
- /*
- * Back up to the last non-whitespace char in 'to'
- *
- */
-
- while (*tmp == ' ' || *tmp == '\t'){
- tmp--;
- }
-
- /*
- * Make sure that, if there was a character,
- * that it was a / and wasn't preceded by \.
- *
- */
-
- if (*tmp && !(*tmp = '/' && *(tmp - 1) != '\\')){
- RETURN(0);
- }
-
- *tmp = '\0';
- }
- else{
- /*
- * Search only.
- * It doesn't make sense to say
- *
- * strsed(string, "g/abc/", range)
- *
- * because we are only searching and returning the
- * matched indexes. So turn off global (in case it's on)
- * so that we will return just the first instance.
- *
- * If no range has been given either, then there's no
- * point in going on.
- *
- */
-
- if (!range){
- RETURN(0);
- }
-
- global = 0;
- search_only = 1;
- }
-
- /*
- * Eliminate backslashes and character ranges etc.
- *
- */
-
- if (!(from = backslash_eliminate(from, REGEX, MEM_FROM)) ||
- !(to = backslash_eliminate(to, REPLACEMENT, MEM_TO))){
- RETURN(0);
- }
-
- /*
- * If the first char of 'to' is '\0' then we are deleting or
- * searching only. We don't have to worry about space since
- * the transformed string will be less than or equal in length
- * to the original. We just overwrite.
- * We set space = -1 so that later on we can avoid worrying
- * about overflow etc.
- *
- * Otherwise, we are doing a substitution. Here we have to
- * worry about space because the replacement may be larger
- * than the original. malloc some room and if we overflow it
- * later we will realloc. slows things down if the new string
- * turns out to be too much bigger. oh well.
- *
- */
-
- if (*to){
- if (!(new_str = mem(MEM_NEWSTR, buf_sz + 1))){
- RETURN(0);
- }
- space = buf_sz;
- }
- else{
- new_str = str;
- space = -1;
- }
-
- /*
- * Do things to get ready for the regex functions.
- * Don't do anything though if the regex in 'from' is ".*"
- * We handle that below. (Just a special case optimisation).
- *
- */
-
- if (from[0] == '.' && from[1] == '*' && from[2] == '\0'){
- register int i;
- match_all = 1;
- /*
- * For safety's sake, clear out the register values.
- * There might be a register reference in the replacement.
- * There will be nothing in the register (since the search
- * pattern was ".*"). Since we aren't calling the regex
- * stuff we can't rely on it to set these to -1.
- */
- for (i = 0; i < RE_NREGS; i++){
- regs.start[i] = -1;
- }
- }
- else{
- if (first_time){
- if (!(re_comp_buf.buffer = (char *)malloc((unsigned)200))){
- RETURN(0);
- }
-
- re_comp_buf.allocated = 200;
-
- if (!(re_comp_buf.fastmap = (char *)malloc((unsigned)1 << BYTEWIDTH))){
- RETURN(0);
- }
-
- first_time = 0;
- }
-
- re_comp_buf.translate = 0;
- re_comp_buf.used = 0;
-
- /*
- * Compile the r.e.
- *
- */
- if (re_compile_pattern(from, strlen(from), &re_comp_buf)){
- RETURN(0);
- }
- }
-
-
- /*
- * Now get on with the matching/replacing etc.
- *
- */
-
- do {
- if (match_all){
- /* Fake a match instead of calling re_search(). */
- match = 1;
- regs.start[0] = 0;
- regs.end[0] = str_len;
- }
- else{
- match = re_search(&re_comp_buf, str, str_len, 0, str_len, ®s);
- }
-
- if (search_only){
- /*
- * Show what happened and return.
- *
- */
-
- range[0] = match == -1 ? -1 : regs.start[0];
- range[1] = match == -1 ? -1 : regs.end[0];
- RETURN(str);
- }
-
- if (match != -1){
-
- /*
- * Copy that portion that was not matched. It will
- * be unchanged in the output string.
- *
- */
- more_space(regs.start[0]);
- strncpy(new_str + new_pos, str, regs.start[0]);
- new_pos += regs.start[0];
-
- /*
- * Put in the replacement text (if any).
- * We substitute the contents of 'to', watching for register
- * references.
- */
-
- tmp = to;
- while (*tmp){
- if (*tmp == '\\' && isdigit(*(tmp + 1))){
-
- /* A register reference. */
-
- register int reg = *(tmp + 1) - '0';
- int translit = 0;
- int need = regs.end[reg] - regs.start[reg];
-
- /*
- * Check for a transliteration request.
- *
- */
- if (*(tmp + 2) == '{'){
- /* A transliteration table. Build the map. */
- static char *build_map();
- if (!(tmp = build_map(tmp + 2, map))){
- RETURN(0);
- }
- translit = 1;
- }
- else{
- tmp += 2;
- translit = 0;
- }
-
- more_space(need);
-
- /*
- * Copy in the register contents (if it matched), transliterating if need be.
- *
- */
- if (regs.start[reg] != -1){
- register int i;
- for (i = regs.start[reg]; i < regs.end[reg]; i++){
- new_str[new_pos++] = translit ? map[str[i]] : str[i];
- }
- }
- }
- else{
- /* A plain character, put it in. */
- more_space(1);
- new_str[new_pos++] = *tmp++;
- }
- }
-
- /*
- * Move forward over the matched text.
- *
- */
- str += regs.end[0];
- str_len -= regs.end[0];
- }
- } while (global && match != -1 && *str);
-
- /*
- * Copy the final portion of the string. This is the section that
- * was not matched (and hence which remains unchanged) by the last
- * match. Then we head off home.
- *
- */
- more_space(str_len);
- (void) strcpy(new_str + new_pos, str);
- RETURN(new_str);
- }
-
- #define DIGIT(x) (isdigit(x) ? (x) - '0' : islower(x) ? (x) + 10 - 'a' : (x) + 10 - 'A')
-
- static char *
- backslash_eliminate(str, type, who)
- char *str;
- int type;
- int who;
- {
- /*
- * Remove backslashes from the strings. Turn \040 etc. into a single
- * character (we allow eight bit values). Currently NUL is not
- * allowed.
- *
- * Turn "\n" and "\t" into '\n' and '\t' characters. Etc.
- *
- * The string may grow slightly here. Under normal circumstances
- * it will stay the same length or get shorter. It is only in the
- * case where we have to turn {a-z}{A-Z} into \0{a-z}{A-Z} that
- * we add two chars. This only happens when we are doing a REPLACEMENT.
- * So we can't overwrite str, and we have to
- * malloc. Sad, but the only ways I could find around it (at this
- * late stage) were really gross. I allowed an extra
- * 100 bytes which should cover most idiotic behaviour.
- * I count the extra space and exit nicely if they do do something
- * extremely silly.
- *
- * 'i' is an index into new_str.
- *
- * 'type' tells us how to interpret escaped characters.
- *
- * type = REGEX
- * if the pattern is a regular expression. If it is then
- * we leave escaped things alone (except for \n and \t and
- * friends).
- *
- * type = REPLACEMENT
- * if this is a replacement pattern. In this case we change
- * \( and \) to ( and ), but leave \1 etc alone as they are
- * register references. - becomes a metacharacter between
- * { and }.
- *
- * type = NORMAL
- * We do \n and \t elimination, as well as \040 etc, plus
- * all other characters that we find quoted we unquote.
- * type = NORMAL when we do a backslash elimination on the
- * string argument to strsed.
- *
- * who tells us where to tell mem where to stick the new string.
- *
- * \{m,n\} syntax (see ed(1)) is not supported.
- *
- */
-
- static char *mem();
- char *new_str;
- int extra = 100;
- int seenlb = 0;
- register int i = 0;
- register int seenbs = 0;
- int first_half = 0;
-
- if (type == REPLACEMENT){
- if (!(new_str = mem(who, strlen(str) + 1 + extra))){
- return 0;
- }
- }
- else{
- new_str = str;
- }
-
- while (*str){
- if (seenbs){
- seenbs = 0;
- switch (*str){
- case '\\':{
- new_str[i++] = '\\';
- str++;
- break;
- }
-
- case '-':{
- if (seenlb){
- /* Keep it quoted. */
- new_str[i++] = '\\';
- }
- new_str[i++] = '-';
- str++;
- break;
- }
-
- case '}':{
- if (seenlb){
- /* Keep it quoted. */
- new_str[i++] = '\\';
- }
- new_str[i++] = '}';
- str++;
- break;
- }
-
- case 'n':{
- new_str[i++] = '\n';
- str++;
- break;
- }
-
- case 't':{
- new_str[i++] = '\t';
- str++;
- break;
- }
-
- case 's':{
- new_str[i++] = ' ';
- str++;
- break;
- }
-
- case 'r':{
- new_str[i++] = '\r';
- str++;
- break;
- }
-
- case 'f':{
- new_str[i++] = '\f';
- str++;
- break;
- }
-
- case 'b':{
- new_str[i++] = '\b';
- str++;
- break;
- }
-
- case 'v':{
- new_str[i++] = '\13';
- str++;
- break;
- }
-
- case 'z':{
- str++;
- break;
- }
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':{
-
- char val;
-
- /*
- * Three digit octal constant.
- *
- */
- if (*str >= '0' && *str <= '3' &&
- *(str + 1) >= '0' && *(str + 1) <= '7' &&
- *(str + 2) >= '0' && *(str + 2) <= '7'){
-
- val = (DIGIT(*str) << 6) +
- (DIGIT(*(str + 1)) << 3) +
- DIGIT(*(str + 2));
-
- if (!val){
- /*
- * NUL is not allowed.
- */
- return 0;
- }
-
- new_str[i++] = val;
- str += 3;
- break;
- }
-
- /*
- * One or two digit hex constant.
- * If two are there they will both be taken.
- * Use \z to split them up if this is not wanted.
- *
- */
- if (*str == '0' && (*(str + 1) == 'x' || *(str + 1) == 'X') && isxdigit(*(str + 2))){
- val = DIGIT(*(str + 2));
- if (isxdigit(*(str + 3))){
- val = (val << 4) + DIGIT(*(str + 3));
- str += 4;
- }
- else{
- str += 3;
- }
-
- if (!val){
- return 0;
- }
-
- new_str[i++] = val;
- break;
- }
-
- /*
- * Two or three decimal digits.
- * (One decimal digit is taken as either a register reference
- * or as a decimal digit if NORMAL is true below.)
- *
- */
- if (isdigit(*(str + 1))){
- val = DIGIT(*str) * 10 + DIGIT(*(str + 1));
- if (isdigit(*(str + 2))){
- val = 10 * val + DIGIT(*(str + 2));
- str += 3;
- }
- else{
- str += 2;
- }
-
- if (!val){
- return 0;
- }
-
- new_str[i++] = val;
- break;
- }
-
- /*
- * A register reference or else a single decimal digit if this
- * is a normal string..
- *
- * Emit \4 (etc) if we are not NORMAL (unless the digit is a 0
- * and we are processing an r.e. This is because \0 makes no
- * sense in an r.e., only in a replacement. If we do have \0
- * and it is an r.e. we return.)
- *
- */
- if (*str == '0' && type == REGEX){
- return 0;
- }
-
- if (type == NORMAL){
- if (!(val = DIGIT(*str))){
- return 0;
- }
- new_str[i++] = val;
- str++;
- }
- else{
- new_str[i++] = '\\';
- new_str[i++] = *str++;
- }
- break;
- }
-
- default:{
- if (type == REGEX){
- new_str[i++] = '\\';
- }
- new_str[i++] = *str++;
- break;
- }
- }
- }
- else{
- if (*str == '\\'){
- seenbs = 1;
- str++;
- }
- else if (type == REPLACEMENT && *str == '}'){
- if (*(str + 1) == '{' && first_half){
- new_str[i++] = *str++;
- new_str[i++] = *str++;
- first_half = 0;
- }
- else{
- seenlb = 0;
- new_str[i++] = *str++;
- }
- }
- else if (type == REPLACEMENT && !seenlb && *str == '{'){
- /*
- * Within { and }, \- should be left as such. So we can differentiate
- * between s/fred/\-/ and s/fred/{\-a-z}{+A-Z}
- *
- * We stick in a "\0" here in the case that \X has not just been
- * seen. (X = 0..9) Which is to say, {a-z}{A-Z} defaults to
- * \0{a-z}{A-Z}
- *
- */
-
- seenlb = 1;
- first_half = 1;
-
- if (i < 2 || new_str[i - 2] != '\\' || !(new_str[i - 1] >= '0' && new_str[i - 1] <= '9')){
- if ((extra -= 2) < 0){
- /* ran out of extra room. */
- return 0;
- }
- new_str[i++] = '\\';
- new_str[i++] = '0';
- }
- new_str[i++] = *str++;
- }
- else{
- /*
- * A normal char.
- *
- */
- new_str[i++] = *str++;
- }
- }
- }
-
- if (seenbs){
- /*
- * The final character was a '\'. Ignore it.
- *
- */
- }
-
- new_str[i] = '\0';
- return new_str;
- }
-
- static char *
- build_map(s, map)
- char *s;
- char *map;
- {
- /*
- * Produce a mapping table for the given transliteration.
- * We are passed something that looks like "{a-z}{A-Z}"
- * Look out for \ chars, these are used to quote } and -.
- *
- * Return a pointer to the char after the closing }.
- * We cannot clobber s.
- *
- * The building of maps is somewhat optimised.
- * If the string is the same as the last one we were
- * called with then we don't do anything. It would be better
- * to remember all the transliterations we have seen, in
- * order (because in a global substitution we will
- * apply them in the same order repeatedly) and then we
- * could do the minimum amount of building. This is a
- * compromise because it is a fairly safe bet that there will
- * not be more than one transliteration done.
- *
- */
-
- char *in;
- char *out;
- char *str;
- char *tmp;
- char c;
- static char *mem();
- static char nextch();
- int i = 0;
- int range_count = 0;
- int seenbs = 0;
- static char *last = 0;
- static int last_len;
-
- if (!s){
- return 0;
- }
-
- if (last && !strncmp(s, last, last_len)){
- /* Re-use the map. */
- return s + last_len;
- }
- else{
- /*
- * Make a copy of s in both 'last' and 'str'
- */
- int len = strlen(s) + 1;
- if (!(str = mem(MEM_MAP, len)) || !(last = mem(MEM_MAP_SAVE, len))){
- return 0;
- }
- str[0] = last[0] = '\0';
- strcat(str, s);
- strcat(last, s);
- }
-
- tmp = str + 1;
- in = str;
-
- while (*tmp){
- if (seenbs){
- if (*tmp == '-'){
- /*
- * Keep the \ before a - since this is the range
- * separating metacharacter. We don't keep } quoted,
- * we just put it in. Then it is passed as a normal
- * char (no longer a metachar) to nextch().
- *
- */
- str[i++] = '\\';
- }
- str[i++] = *tmp++;
- seenbs = 0;
- }
- else{
- if (*tmp == '\\'){
- seenbs = 1;
- tmp++;
- }
- else if (*tmp == '}'){
- if (!range_count){
- /* seen first range. */
- range_count = 1;
- str[i++] = '\0';
- tmp++;
- while (*tmp == ' ' || *tmp == '\t'){
- tmp++;
- }
- if (*tmp != '{'){
- return 0;
- }
- out = str + i;
- tmp++;
- }
- else{
- /* seen both ranges. */
- str[i++] = '\0';
- tmp++;
- range_count = 2;
- break;
- }
- }
- else{
- /* A plain defenceless character. */
- str[i++] = *tmp++;
- }
- }
- }
-
- if (range_count != 2){
- return 0;
- }
-
- last_len = tmp - str;
-
- /*
- * Now 'out' and 'in' both point to character ranges.
- * These will look something like "A-Z" but may be
- * more complicated and have {} and - in them elsewhere.
- *
- */
-
- for (i = 0; i < 1 << BYTEWIDTH; i++){
- map[i] = i;
- }
-
- /*
- * Ready the range expanding function.
- *
- */
- (void) nextch(in, 0);
- (void) nextch(out, 1);
-
- /*
- * For each char in 'in', assign it a value in
- * 'map' corresponding to the next char in 'out'.
- *
- */
-
- while ((c = nextch(0, 0))){
- map[c] = nextch(0, 1);
- }
-
- return tmp;
- }
-
- static char
- nextch(str, who)
- char *str;
- int who;
- {
- /*
- * Given a range like {a-z0237-9}
- * return successive characters from the range on
- * successive calls. The first call (when str != 0)
- * sets things up.
- *
- * We must handle strange things like
- * {a-b-c-z} = {a-z}
- * and {z-l-a} = {z-a}
- * and {f-f-f-f-h} = {f-h}
- * and {a-z-f-h-y-d-b} = {a-b}
- *
- * and so on.
- *
- * This function will remember two strings and will return
- * the next charcter in the range specified by 'who'. This
- * makes the building of the transliteration table above
- * a trivial loop.
- *
- * I can't be bothered to comment this as much as it
- * deserves right now... 8-)
- *
- */
-
- static char *what[2] = {0, 0};
- static char last[2] = {0, 0};
- static int increment[2];
- static int pos[2];
-
- if (who < 0 || who > 1){
- return 0;
- }
-
- if (str){
- /* Set up for this string. */
- what[who] = str;
- pos[who] = 0;
- return 1;
- }
- else if (!what[who]){
- return 0;
- }
-
- if (!pos[who] && what[who][0] == '-'){
- return 0;
- }
-
- switch (what[who][pos[who]]){
-
- case '-':{
- /* we're in mid-range. */
- last[who] += increment[who];
- if (what[who][pos[who] + 1] == last[who]){
- pos[who] += 2;
- }
- return last[who];
- }
-
- case '\0':{
- /*
- * We've finished. Keep on returning the
- * last thing you saw if who = 1.
- */
- if (who){
- return last[1];
- }
- return 0;
- }
-
- /* FALLTHROUGH */
- case '\\':{
- pos[who]++;
- }
-
- default:{
- last[who] = what[who][pos[who]++];
- /*
- * If we have reached a '-' then this is the start of a
- * range. Keep on moving forward until we see a sensible
- * end of range character. Then set up increment so that
- * we do the right thing next time round. We leave pos
- * pointing at the '-' sign.
- *
- */
-
- while (what[who][pos[who]] == '-'){
- int inc = 1;
- if (what[who][pos[who] + inc] == '\\'){
- inc++;
- }
- if (!what[who][pos[who] + inc]){
- return 0;
- }
- if (what[who][pos[who] + inc + 1] == '-'){
- pos[who] += inc + 1;
- continue;
- }
- increment[who] = what[who][pos[who] + inc] - last[who];
- if (!increment[who]){
- pos[who] += 2;
- continue;
- }
- if (increment[who] > 0){
- increment[who] = 1;
- break;
- }
- else if (increment[who] < 0){
- increment[who] = -1;
- break;
- }
- }
- return last[who];
- }
- }
- }
-
- static char *
- mem(who, size)
- int who;
- int size;
- {
- /*
- * Get 'size' bytes of memeory one way or another.
- *
- * The 'mem_slots' array holds currently allocated hunks.
- * If we can use one that's already in use then do so, otherwise
- * try and find a hunk not in use somewhere else in the table.
- * As a last resort call malloc. All a bit specialised and
- * not too clear. Seems to works fine though.
- */
-
- static void mem_save();
-
- if (who < 0 || who >= MEM_SLOTS){
- return 0;
- }
-
- if (mem_slots[who].used){
- /*
- * There is already something here. Either move/free it or
- * return it if it is already big enough to hold this request.
- */
- if (mem_slots[who].size >= size){
- /* It is already big enough. */
- return mem_slots[who].s;
- }
- else{
- mem_save(who);
- }
- }
- else{
- /*
- * The slot was not in use. Check to see if there is space
- * allocated here already that we can use. If there is and
- * we can, use it, if there is and it's not big enough try to
- * save it. if there isn't then try to find it in another free slot,
- * otherwise don't worry, the malloc below will get us some.
- */
- if (mem_slots[who].s && mem_slots[who].size >= size){
- /* We'll take it. */
- mem_slots[who].used = 1;
- return mem_slots[who].s;
- }
-
- if (mem_slots[who].s){
- mem_save(who);
- }
- else{
- static int mem_find();
- int x = mem_find(size);
- if (x != -1){
- mem_slots[who].s = mem_slots[x].s;
- mem_slots[who].size = mem_slots[x].size;
- mem_slots[who].used = 1;
- mem_slots[x].s = (char *)0;
- return mem_slots[who].s;
- }
- }
- }
-
- /*
- * Have to use malloc 8-(
- */
-
- if (!(mem_slots[who].s = (char *) malloc((unsigned)size))){
- return 0;
- }
- mem_slots[who].size = size;
- mem_slots[who].used = 1;
-
- return mem_slots[who].s;
- }
-
- static int
- mem_find(size)
- int size;
- {
- /*
- * See if we can find an unused but allocated slot with 'size'
- * (or more) space available. Return the index, or -1 if not.
- */
-
- register int i;
-
- for (i = 0; i < MEM_SLOTS; i++){
- if (!mem_slots[i].used && mem_slots[i].s && mem_slots[i].size >= size){
- return i;
- }
- }
- return -1;
- }
-
- static void
- mem_save(x)
- int x;
- {
- /*
- * There is some memory in mem_slots[x] and we try to save it rather
- * than free it. In order we try to
- *
- * 1) put it in an unused slot that has no allocation.
- * 2) put it in an unused slot that has an allocation smaller than x's
- * 3) free it since there are no free slots and all the full ones are bigger.
- *
- */
-
- register int i;
- register int saved = 0;
-
- /*
- * First we try to find somewhere unused and with no present allocation.
- */
- for (i = 0; i < MEM_SLOTS; i++){
- if (!mem_slots[i].used && !mem_slots[i].s){
- saved = 1;
- mem_slots[i].s = mem_slots[x].s;
- mem_slots[i].size = mem_slots[x].size;
- mem_slots[i].used = 0;
- break;
- }
- }
-
- /*
- * No luck yet. Try for a place that is not being used but which has
- * space allocated, and which is smaller than us (and all other such spots).
- * Pick on the smallest, yeah.
- */
- if (!saved){
- register int small = -1;
- register int small_val = 1000000;
- for (i = 0; i < MEM_SLOTS; i++){
- if (!mem_slots[i].used && mem_slots[i].size < mem_slots[x].size && mem_slots[i].size < small_val){
- small_val = mem_slots[i].size;
- small = i;
- }
- }
-
- if (small != -1){
- saved = 1;
- /* We got one, now clobber it... */
- free(mem_slots[small].s);
- /* and move on in. */
- mem_slots[small].s = mem_slots[x].s;
- mem_slots[small].size = mem_slots[x].size;
- mem_slots[small].used = 0;
- }
- }
-
- if (!saved){
- /* Have to toss it away. */
- free(mem_slots[x].s);
- }
- }
-
- static void
- mem_init()
- {
- /*
- * Clear all the memory slots.
- */
-
- register int i;
-
- for (i = 0; i < MEM_SLOTS; i++){
- mem_slots[i].s = (char *)0;
- mem_slots[i].used = 0;
- }
- }
-
- static void
- mem_free(except)
- char *except;
- {
- /*
- * "Clear out" all the memory slots. Actually we do no freeing since
- * we may well be called again. We just mark the slots as unused. Next
- * time round they might be useful - the addresses and sizes are still there.
- *
- * For the slot (if any) whose address is 'except', we actually set the
- * address to 0. This is done because we are called ONLY from the macro
- * RETURN() in strsed() and we intend to return the value in 'except'.
- * Once this is done, strsed should (in theory) have no knowledge at all
- * of the address it passed back last time. That way we won't clobber it
- * and cause all sorts of nasty problems.
- */
-
- register int i;
-
- for (i = 0; i < MEM_SLOTS; i++){
- mem_slots[i].used = 0;
- if (mem_slots[i].s == except){
- mem_slots[i].s = (char *)0;
- mem_slots[i].size = 0;
- }
- }
- }
-
-