home *** CD-ROM | disk | FTP | other *** search
- /*
- * Date: March 17 1985 Author: Arch D. Robison
- * Dept. of Computer Science
- * University of Illinois
- * Urbana-Champaign
- *
- * USENET: robison@uiucdcs
- *
- * Hash8 copies stdin to stdout, while replacing certain identifiers.
- * lint can be converted to accept long identifiers by hacking in hash8
- * between /lib/cpp and /usr/lib/lint/lint appropriately.
- *
- * There are three ways to call hash8:
- *
- * hash8 encode table
- * Map long identifiers and those beginning with Q
- * into short identifiers Q%d
- *
- * hash8 decode table
- * Map short identifiers Q%d into their long equivalents
- *
- * hash8 _decode table
- * Map short identifiers _Q%d into their long equivalents
- * This is used to decode the linker's error messages
- *
- * The 'table' argument is the file name for the identifier map.
- * The 'encode' calls will either create or expand the table.
- *
- * Typically, the encode option is used to preprocess input to the compiler
- * or lint, and the decode option is used to decode error messages from
- * the compiler.
- *
- * The constant HASHBITS may need to be changed. It is the base two
- * log of the number of distinct long identifiers which may be found.
- * E.g. the value of 12 allows for 4096 long identifiers.
- *
- * Hash8 has not been thoroughly tested, though it can translate itself
- * correctly. Note that itself contains all sorts of quotes within quotes.
- */
- #include <stdio.h>
- #include <ctype.h>
-
- /*
- * Reserved is an array of words which we don't want modified, such
- * as the key word "register", or system functions longer than 7 characters.
- * Feel free to add any others, though remember to clear your hash table
- * files after recompiling.
- */
- char **Reserved = NULL;
- int Res_max = 0;
- int Res_count = 0;
- char *Def_reserved[] = {
- "continue",
- "register",
- "unsigned"
- };
-
- extern char *malloc (), *strcpy ();
-
- #define SIGCHARS 7 /* significant characters in identifier */
- #define HASHBITS 12 /* hash table address size */
- #define HASHLIMIT (1<<HASHBITS)
- #define HASHMASK (HASHLIMIT-1)
- #define PAGESIZE 4096 /* Memory allocation pagesize */
- #define MAXLINE 1024 /* Maximum length of a source line allowed */
-
- #define W_SHORTEN 0 /* Identifier classes */
- #define W_NORMAL 1
- #define W_RESERVED 2
- #define W_Valid(N) ((N) >= 0 && (N) <= 2)
-
- /*
- * HashTab
- *
- * The identifier map is a hash table. The table uses open addressing
- * with linear probing for collision resolution. Identifiers in the
- * table are mapped into Qxxx, where xxx is the table address in hex.
- *
- * The hash table is effectively declared:
- *
- * char *HashTab[HASHLIMIT];
- *
- * though the memory allocation is done with malloc. Each empty hash table
- * item is NULL. Full entries point to an identifier. The first byte of
- * the identifier classifies the identifier:
- *
- * W_NORMAL - don't modify this identifier
- * W_SHORTEN - shorten this identifier
- * W_RESERVE - reserved word
- */
- char **HashTab;
- int HashSize = 0; /* Number of elements in hash table */
- int NewTab; /* Flag which is set to true if hash table is modified */
-
- char *StrFree; /* Pointer to base of free string area */
- int StrLeft = 0; /* Number of characters left in free string area */
-
- /*
- * Insert
- *
- * Insert identifier in hash table
- *
- * In
- * k = index into hash table
- * S = identifier
- * Class = class of identifier (W_NORMAL,W_SHORTEN,W_RESERVED)
- */
- void Insert (k,S,Class)
- int k;
- char *S;
- int Class;
- {
- register int L;
-
- NewTab = 1;
- HashSize++;
- if ((StrLeft -= (L=2+strlen (S))) < 0)
- StrFree = malloc (StrLeft=PAGESIZE);
- *(HashTab[k] = StrFree) = Class;
- strcpy (StrFree+1, S);
- StrFree += L;
- StrLeft -= L;
- }
-
- /*
- * LookUp
- *
- * Look up an identifer in the identifier hash table.
- * If not found, then insert it in the table.
- *
- * The hashing uses open addressing with linear probing.
- * The algorithm is a blue-light special, a better hash function
- * (double hashing?) should be used.
- *
- * In
- * S = identifier (must be at least seven characters if Duplicate == 0)
- * Class = identifier class (W_NORMAL,W_SHORTEN,W_RESERVED)
- * Out
- * result = index into hash table
- */
- int LookUp (S,Class)
- char *S;
- int Class;
- {
- register int k,j;
- register char *T;
-
- if (Class != W_SHORTEN) {
-
- /* Hash first seven characters of identifier */
- for (j=0,k=0,T=S; j<SIGCHARS; j++, k+= *T++) k = (k<<1) + k;
-
- /* 7-character search for identifier in table */
- for (j=k; HashTab[j&=HASHMASK] != NULL; j++)
- if (!strncmp (HashTab[j]+1,S,SIGCHARS))
- if (!strcmp (HashTab[j]+1,S)) return j;
- else {
- Class = W_SHORTEN;
- break;
- }
- /* The following test and assignment cause identifiers to be
- * hashed even if they are the first long identifier. This
- * protects from truncation by the compiler. Othewise, when
- * you run adb you have to know which long id came first.
- * Geoff Kuenning 11/8/86
- */
- if (Class == W_NORMAL && strlen (S) > SIGCHARS)
- Class = W_SHORTEN;
- }
-
- if (Class == W_SHORTEN) {
- /*
- * There is another identifier with the same 7-character prefix.
- * Hash the complete identifier and look it up in the table.
- */
- for (j=k; *T; j+= *T++) j = (j<<1) + j;
-
- /* all characters search for identifier in table */
- for (; HashTab[j&=HASHMASK] != NULL; j++)
- if (!strcmp (HashTab[j]+1,S)) return j;
- }
-
- /* Identifier was not found - insert it in hash table */
- Insert (j,S,Class);
- if (HashSize == HASHLIMIT)
- fprintf (stderr,"hash8: table overflow\n"), exit (1);
- return j;
- }
-
- #define C_CODE 0 /* Defines for translator states */
- #define S_QUOTE 1
- #define D_QUOTE 2
- #define COMMENT 3
-
- #define ENCODE 0 /* Mode values for translator */
- #define DECODE 1
- #define _DECODE 2
-
- /*
- * Translate
- *
- * Translate input stream with identifier map.
- *
- * This should have been written with lex.
- */
- Translate (Mode)
- int Mode;
- {
- register char C, *P, *Q;
- char S[MAXLINE];
- int k, state=C_CODE, IsQ;
-
- while (NULL != fgets (S,MAXLINE,stdin))
- for (P=S; C= *P; )
- switch (state) {
- case COMMENT:
- putchar (*P++);
- if (C == '*' && *P == '/') state=C_CODE, putchar (*P++);
- break;
- case S_QUOTE:
- case D_QUOTE:
- putchar (*P++);
- switch (C) {
- case '\'': if (state == S_QUOTE) state = C_CODE; break;
- case '"' : if (state == D_QUOTE) state = C_CODE; break;
- case '\\': putchar (*P++); break;
- default: break;
- }
- break;
-
- case C_CODE:
- if (isalpha (C) || C=='_') {
- /* Beginning of identifier */
- for (Q=P; C= *Q, isalnum(C)||C=='_'; Q++);
- *Q = '\0';
- switch (Mode) {
-
- case ENCODE: /* We are encoding C source */
- IsQ = *P=='Q' && isdigit (P[1]);
- if (Q-P <= SIGCHARS && !IsQ)
- fputs (P,stdout);
- else {
- k = LookUp (P,IsQ ? W_SHORTEN : W_NORMAL);
- if (*HashTab[k] != W_SHORTEN) fputs (P,stdout);
- else printf ("Q%d",k);
- }
- break;
-
- case _DECODE: /* We are decoding linker messages */
- if (*P != '_') {
- fputs (P,stdout);
- break;
- }
- putchar (*P++);
- /* continue on down to case DECODE */
-
- case DECODE: /* We are decoding error message */
- if (*P=='Q' && isdigit (P[1])) {
- k=atoi(P+1);
- if (!(k &~HASHMASK) && HashTab[k]!=NULL)
- P = HashTab[k] + 1;
- }
- fputs (P,stdout);
- break;
- }
- *(P=Q) = C;
- } else if (isdigit (C)) {
- /* Skip number to avoid changing long numbers */
- while (isalnum(*P)) putchar (*P++);
- } else {
- putchar (*P++);
- switch (C) {
- default: break;
- case '\'': state = S_QUOTE; break;
- case '"' : state = D_QUOTE; break;
- case '/' : if (*P != '*') continue;
- state=COMMENT;
- case '\\': putchar (*P++); break;
- }
- }
- }
- }
-
- /*
- * ReadTab
- *
- * Read the hash table.
- *
- * In
- * Name = name of hash table file
- */
- ReadTab (Name)
- char *Name;
- {
- FILE *Table;
- char S[MAXLINE];
- int k,L,Class;
-
- /* First record all words we don't want mangled in hash table */
- for (k = 0; k < sizeof (Def_reserved) / sizeof (char *); k++)
- LookUp (Def_reserved[k], W_RESERVED);
- for (k = 0; k < Res_count; k++)
- LookUp (Reserved[k],W_RESERVED);
-
- if (NULL == (Table = fopen (Name,"r"))) return;
- while (EOF != (L = fscanf (Table,"%d %d %s",&k,&Class,S)))
- if (L != 3 || k &~HASHMASK || !W_Valid (Class))
- fprintf (stderr,"hash8 table error\n"),
- exit (1);
- else Insert (k,S,Class);
- fclose (Table);
- NewTab = 0;
- }
-
- /*
- * WriteTab
- *
- * Write out the hash table
- *
- * In
- * Name = name of hash table file
- */
- WriteTab (Name)
- char *Name;
- {
- FILE *Table;
- int i;
-
- if (NULL == (Table = fopen (Name,"w")))
- fprintf (stderr,"hash8: can't open hash table file '%s'\n",Name),
- exit (1);
- for (i=0; i<HASHLIMIT; i++)
- if (HashTab[i] != NULL && *HashTab[i] != W_RESERVED)
- fprintf (Table,"%d %d %s\n",i,*HashTab[i],HashTab[i]+1);
- fclose (Table);
- }
-
- main (argc,argv)
- int argc; char *argv[];
- {
- register char **h;
- int Mode;
-
- /*
- * Set up the reserved-word list.
- */
- while (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'r') {
- argv[1] += 2;
- if (argv[1][0] == '\0') {
- argc--;
- argv++;
- }
- if (Res_count == Res_max) {
- Res_max += 5;
- if (Reserved == NULL)
- Reserved = (char **) malloc (5 * sizeof (char *));
- else
- Reserved = (char **)
- realloc ((char *) Reserved, Res_max * sizeof (char *));
- }
- Reserved[Res_count] = argv[1];
- Res_count++;
- argc--;
- argv++;
- }
- if (argc != 3) {
- fprintf (stderr,
- "usage: hash8 [-r reserved] ... (encode|[_]decode) table\n");
- exit (1);
- }
-
- /*
- * If either stdin or stdout is a tty, set both unbuffered, for use
- * in pipes.
- * Geoff Kuenning, 11/8/86
- */
- if (isatty (fileno (stdin)) || isatty (fileno (stdout))) {
- setbuf (stdin, NULL);
- setbuf (stdout, NULL);
- }
- HashTab = (char **) malloc ((sizeof (char*)) * (HASHLIMIT));
- for (h = HashTab+HASHLIMIT; --h > HashTab; ) *h = NULL;
-
- ReadTab(argv[2]);
-
- if (!strcmp (argv[1],"encode")) Mode = ENCODE;
- else if (!strcmp (argv[1],"decode")) Mode = DECODE;
- else if (!strcmp (argv[1],"_decode")) Mode = _DECODE;
- else
- fprintf (stderr,"hash8: second arg must be 'encode' or 'decode'\n"),
- exit (1);
-
- Translate (Mode);
- if (NewTab) WriteTab(argv[2]);
- }
-
-