home *** CD-ROM | disk | FTP | other *** search
- /* ------------------------------------------------------------------------ *
- * K R 2 A N S I . C *
- * ------------------------------------------------------------------------ *
- * 9/23/91
- * Author: Harry Karayiannis
- * ______________________E-MAIL:_____________________
- * INTERnet: | BITnet:
- * harryk@bucsf.bu.edu | cscrzcc@buacca.bu.edu
- * --------------------------------------------------
- *
- * Purpose: Read a C-code source file written in Kernighan-Ritchie's
- * programming style, and produce ANSI prototypes for all
- * functions. (Hint: using output redirection you can create
- * a file with prototypes of all the functions in your K&R C
- * source-file. The program is automagically putting the word
- * "extern" in front of the prototype-line, so you can #include
- * the file in you current source code and compile it with an
- * ANSI compiler).
- *
- * Usage: kr2ansi [-p] [ [-r file1] | [usr_def1 ... usr_defn] ] file2
- * -p : show parameters
- * -r file1 : read user-defined types from 'file1'
- * usr_def1 ... usr_defn :
- * user-defined types present in 'file2'
- * file2 : the file to read C-code from
- *
- * Notes: This program is useful for people (like myself) who like
- * K&R programming style, but want to take advantage of the
- * parameter-cheking, during compilation, typically done by
- * ANSI compilers (e.g. gcc, Turbo-C, Prospero-C, etc).
- * Others simply want their K&R code to compile under an ANSI
- * compiler without watching all those "irritating" warnings
- * saying: "Function call without prototyping".
- * For either case, kr2ansi can prove very handy.....
- *
- * Caveats: The program is not bug-free. I tried to make it work with
- * files that follow the programming style presented in
- * "The C Programming Language (1st Edition)", by Kernighan
- * and Ritchie.
- * In other words, it expects K&R function declarations to
- * be in the following form:
- * (NOTE: I write comments with backslashes '\' cause
- * MWC does not parse nested comments)
- *
- * FUNCTION-TYPE FUNCTION-NAME(P1,P2,...,PN) \* comments *\
- * \* also you can have ... *\
- * \* ...more comments here *\
- * PARAMETER-TYPE P1,P2; \* comments *\
- * \* ... or here *\
- * PARAMETER-TYPE P3; \* more comments *\
- * PARAMETER-TYPE P4,...,PN; \* yet more comments *\
- * { \* ... or even here *\
- * function-body
- * }
- *
- * The program gets confused when it finds 1) comments inside the
- * parameter list, 2) semicolons placed after comments, 3) comments
- * that open in one line and close in a different one, 4) curly
- * braces '{' that do not appear as the first char in a separate
- * line, and perhaps in some more case I've not figured out yet.
- * But you shouldn't use such a bad programming style anyways 8*)
- * (Hint: I think that unix-like preprocessors (like "cpp" in
- * MWc, gcc, etc) take a flag that causes comments to be
- * removed from the source-file...check it out)
- *
- * However, it is almost guaranteed to work with files that follow
- * K & R, or Rochkind's programming style (except in one case...)
- *
- *
- * Bugs: kr2ansi fails to find integer functions that do not include
- * the data-type in the beginning of the function-declaration:
- * e.g.
- * main(argc,argv)
- * int argc;
- * char *argv[];
- * {
- * ...
- * }
- *
- * The reason is that the program identifies a K&R function header
- * by checking the first word of the line, which *must* be a valid
- * data-type (standard or user-defined). If you come up with a bet-
- * ter algorithm please let me know. (Please don't tell me to write
- * a complete C parser, cause I won't)
- */
-
-
-
- #include <stdio.h> /* don't tell me you don't know this one */
- #include "kr2ansi.h" /* constants, macros & user-defined types */
-
- char *progname; /* the name of the program */
- char *data_types[]={ DATA_TYPES }; /* array of valid data-types */
- unsigned options = NONE; /* bit map with command-line options */
-
- /* ======================================================================== *
- * main
- * ======================================================================== */
-
- int main(argc, argv)
- int argc;
- char *argv[];
- {
- char fname_in[MAXNAME]; /* name of input file */
- GLOBAL char *data_types[]; /* array of valid data-types */
-
- void kr2ansi();
- BOOLEAN parse(); /* return FALSE on command_line error */
- extern void usage(); /* in file: ERROR.C */
-
-
-
- progname = argv[0]; /* set the program's name */
- if ( !parse(argc, argv, data_types, fname_in) )
- usage("[-p] [ [-r file1] | [usr_def1 ... usr_def10] ] file2");
- else
- kr2ansi(fname_in);
-
- return(0);
- }
-
-
- /* ======================================================================== *
- * parse
- * ======================================================================== */
-
- BOOLEAN parse(argc, argv, data_types, fname_in)
- int argc; /* number of args in command-line */
- char *argv[]; /* the arguments themselves */
- char *data_types[]; /* array of valid data-types */
- char *fname_in; /* name of the input file */
- {
- /* Check the validity of the command line,
- * make valid specified user-defined data-types,
- * and assign the specified filename to : fname_in
- */
-
- register int i,u; /* integer counters */
- void set_dtypes(); /* read user-def. types from a file */
- GLOBAL unsigned options; /* bit map with command-line options */
- STD_CLIB char *strcpy(); /* part of the standard C-library */
-
-
- if (argc < 2) /* too few arguments */
- return(FALSE);
-
- i=1; /* you can add your own options in this loop */
- while ( argv[i][0] == '-' )
- {
- if ( !strcmp(argv[i],"-p") ) /* option -p: */
- { /* show parameters */
- if (i == argc-1)
- return(FALSE);
- options |= SHOW_PARA;
- }
- else if ( !strcmp(argv[i],"-r") ) /* option -r: */
- { /* read user-defined types from file */
- if (i != argc-3)
- return(FALSE);
- options |= RD_TYPES;
- set_dtypes(argv[i+1], data_types);
- }
- else if ( !strcmp(argv[i],"") )
- options |= UNUSED4; /* unused slot */
- else if ( !strcmp(argv[i],"") )
- options |= UNUSED5; /* unused slot */
- else if ( !strcmp(argv[i],"") )
- options |= UNUSED6; /* unused slot */
- else if ( !strcmp(argv[i],"") )
- options |= UNUSED7; /* unused slot */
- else if ( !strcmp(argv[i],"") )
- options |= UNUSED8; /* unused slot */
- else /* invalid option */
- return(FALSE); /* return FALSE */
- i++;
- }
-
- if ( !(options & RD_TYPES) ) /* get user-def types from command-line */
- for (u=i; (u<=N_DT_USR && u<argc-1); u++)
- strcpy(data_types[DT_USR1+u-i], argv[u]);
-
- strcpy(fname_in, argv[argc-1]); /* get name of the input-file */
-
- return(TRUE);
- }
-
-
-
- /* ======================================================================== *
- * kr2ansi
- * ======================================================================== */
-
- void kr2ansi(fname)
- char *fname; /* name of the input file */
- {
- /* This function opens the input file, and reads all
- * the lines, one at a time. If a line is a valid K&R
- * function decleration (header) it calls make_ansi()
- * to convert the line into an ANSI function decleration
- */
-
- FILE *fp_in; /* used for reading the file: fname */
- char ln[MAXLINE]; /* next line in the file: fname */
-
- BOOLEAN is_KR_header(); /* TRUE if ln is a valid K&R function header */
- void make_ansi(); /* make ln an ANSI function header */
- extern void fatal(); /* in file: ERROR.C */
- STD_CLIB char *fgets(); /* part of the standard C-library */
-
-
-
- fp_in = fopen(fname, "r");
- demand(fp_in != NULL, "file2 does not exist");
- while (fgets(ln, MAXLINE, fp_in) != NULL)
- {
- if ( is_KR_header(ln) )
- {
- make_ansi(fp_in, ln);
- printf("%s",ln);
- }
- }
- fclose(fp_in);
- }
-
-
-
- /* ======================================================================== *
- * set_dtypes()
- * ======================================================================== */
-
- void set_dtypes(fname, data_types)
- char *fname; /* name of the file to read user-def types from */
- char *data_types[]; /* array with valid data-types */
- {
- /*
- * set_dtypes() reads up to N_DT_USR words from file 'fname' and assigns
- * them to the array 'data_types[]'. Each word can be up to DT_MAXWORD
- * characters long (words longer than DT_MAXWORD are truncated).
- */
-
- FILE *fp; /* used for reading the file: fname */
- int c;
- BOOLEAN onword; /* TRUE if we are on a word */
- register int ccount; /* counter for word's letters (up to DT_MAXWORD) */
- register int wcount; /* counter for words (up to N_DT_USR) */
- extern void fatal(); /* in file: ERROR.C */
- STD_CLIB int fgetc(); /* part of the standard C-library */
-
-
- fp = fopen(fname, "r");
- demand( fp != NULL, "file1 does not exist");
- while ((c=fgetc(fp)) != EOF && IS_BLANK(c)); /* skip leading blanks */
- if (c == EOF) /* if file is empty, exit */
- {
- fclose(fp);
- fatal("file1 is empty");
- }
-
- /*
- * IMPORTANT:
- * at this point we know for a fact that 'c' is
- * the first letter of the first word in the file
- */
-
- wcount = DT_USR1; /* the first slot for user-def types */
- ccount = 0; /* initialize char-counter */
- data_types[wcount][ccount++] = c; /* set the first character */
- onword = TRUE; /* we are on the first word */
- while ( wcount < DT_USR1+N_DT_USR && (c=fgetc(fp)) != EOF )
- {
- if ( IS_BLANK(c) ) /* we are on a blank character */
- {
- if (onword) /* if it immediately follows a word */
- { /* we should take care of some stuff */
- onword = FALSE; /* we're not on a word anymore */
- data_types[wcount][ccount] = '\0'; /* terminate previous word */
- wcount++; /* increase word-counter */
- ccount = 0; /* reset char-counter */
- }
- }
- else /* we are on a letter... */
- {
- onword = TRUE; /* ..thus we are on a word */
- if (ccount < DT_MAXWORD-1) /* no more DT_MAXWORD chars allowed */
- data_types[wcount][ccount++] = c; /* append 'c' in current data-type */
- }
- }
- fclose(fp);
- }
-
-
- /* ======================================================================== *
- * is_KR_header
- * ======================================================================== */
-
- BOOLEAN is_KR_header(header)
- char *header; /* potential K&R func. header */
- {
- /* This function recognises a "valid" K&R func. header line
- * by testing three basic conditions (the order is significant):
- *
- * 1. the first word _must_ be a valid data-type.
- *
- * 2. the line _must not_ contain a semicolon.
- * (BUG: we screw up if line contains ';' inside a comment)
- *
- * 3. a) after we remove potentially commented characters,
- * b) the last non-blank character _must_ be a closing parenthesis: ')'
- *
- * If any of the above conditions fail then the function returns FALSE,
- * otherwise the line is considered to be valid, it is modified a little
- * (see below), and the function returns TRUE.
- *
- * If the function concludes that the line is a valid K&R func-header
- * it modifies the line in order to bring it in the form expected by
- * the function make_ansi(): a) removes any white spaces and potential
- * comments after the closing parenthesis, and b) appends a semicolon
- * and a newline character.
- * (Actually potential comments are _always_ removed)
- */
-
- char word[MAXWORD]; /* the first word of the line */
- char *get_1st_word(); /* returns the 1st word in a string */
- register int i; /* just a counter */
- BOOLEAN valid(); /* TRUE if the 1st param. is a valid data-type */
- STD_CLIB int strlen(); /* part of the standard C-library */
- STD_CLIB char *strcpy(); /* part of the standard C-library */
-
-
-
- strcpy(word,get_1st_word(header));/* C o n d i t i o n # 1: */
- if ( !valid(word) ) /* 1st word must be a valid data-type */
- return(FALSE);
-
- i = strlen(header); /* C o n d i t i o n # 2: */
- while (i > 0){ /* no ';' allowed in the header */
- if ( header[i] == ';' )
- return(FALSE);
- i--;
- }
- /* C o n d i t i o n # 3: */
- i = 0; /* a. remove potential comments */
- while (header[i] != '\0' && header[i] != '/') i++;
- header[i] = '\0';
- i = strlen(header)-1; /* b. last non-blank char must be ')' */
- while ( IS_BLANK(header[i]) && i>0 ) i--;
- if ( header[i] != ')' )
- return(FALSE);
- /* M o d i f y L i n e: */
- header[++i] = ';'; /* append a semicolon, and */
- header[++i] = '\n'; /* a newline character */
- header[++i] = '\0';
-
- return(TRUE);
- }
-
-
-
- /* ======================================================================== *
- * get_1st_word
- * ======================================================================== */
-
- char *get_1st_word(line)
- char *line;
- {
- /* Return the first word in the parameter string
- * If the word is longer than MAXWORD,
- * the function returns W_TOO_LONG
- * NOTE:
- * W_TOO_LONG should contain _at most_ MAXWORD chars in the quotes
- */
-
- char word[MAXLINE]; /* reserve space for MAXLINE chars (see below) */
- register int i; /* just a counter */
- STD_CLIB char *strcpy(); /* part of the standard C-library */
-
-
-
- i=0; /* skip leading blanks and... */
- while (line[i] != '\0' && IS_BLANK(line[i]))
- i++;
- strcpy(word, &line[i]); /* ...put result in: 'word' */
- /* NOTE: 'word' has room for MAXLINE chars */
-
- i=0; /* keep only the 1st word */
- while (word[i] != '\0' && !IS_BLANK(word[i]))
- i++;
- word[i] = '\0';
- /* return 'word' (or W_TOO_LONG) */
- return( (strlen(word) > MAXWORD) ? W_TOO_LONG : word );
- }
-
-
-
-
- /* ======================================================================== *
- * valid
- * ======================================================================== */
-
- BOOLEAN valid(word)
- char *word;
- {
- /* Compare the string 'word' against all
- * strings listed in the array 'data_types'.
- * Return TRUE on the first match, or FALSE
- * if 'word' is not listed in 'data_types'.
- */
-
- register int i; /* just a counter */
- GLOBAL char *data_types[]; /* array with valid data-types */
- STD_CLIB int strcmp(); /* part of the standard C-library */
-
-
- for (i=DT_STD1; i<=(LAST_DT_STD+N_DT_USR); i++)
- if ( !strcmp(word, data_types[i]) )
- return(TRUE);
-
- return(FALSE);
- }
-
-
-
-
-
- /* ======================================================================== *
- * make_ansi
- * ======================================================================== */
-
-
- void make_ansi(fp, proto)
- FILE *fp; /* pointer to file: fname_in */
- char *proto; /* ANSI-prototyping to be produced */
- {
- /* This function takes the string 'proto' and converts it to
- * a valid ANSI function prototype:
- *
- * First it inserts the word "extern" into the string 'line',
- * just in front of the function's data-type.
- * Then it checks if the parameter-list is empty, via the
- * function: has_param(), and removes all the chars after the
- * opening parenthesis (i.e. 'proto' becomes:
- * "extern fn_type fn_name(" ).
- * Now, if the parameter list was empty, the string: "void);\n"
- * is appended to 'proto' and the function returns. Otherwise it
- * is expecting to find the parameter-declarations between the
- * function-declaration and the first '{' character. For each
- * such line, it removes potential comments and checks the first
- * word (parameter-type) against all valid data-types.
- * If everything is ok, the parameter-type along with the
- * parameter itself are appended to 'proto' (via the function:
- * append_param() ). Otherwise the parameter-type is "assumed"
- * to be invalid and the string constant: UNDEFINED_DATA_TYPE
- * is used instead.
- * BUG: The above algorithm fails when a comment is opened in
- * one line and is closed in a different line.
- * The result is that in the output line commented words
- * will appear as a parameters of type UNDEFINED_DATA_TYPE.
- */
-
-
- char *cp;
- char par_decl[MAXLINE]; /* parameter-declaration line */
- char par_type[MAXWORD]; /* parameter's data-type */
- char *get_1st_word();
- BOOLEAN no_err = TRUE;
- BOOLEAN append_param(); /* see below */
- BOOLEAN has_params(); /* see below */
- BOOLEAN valid();
- STD_CLIB int strlen(); /* part of the standard C-library */
- STD_CLIB char *strcpy(); /* part of the standard C-library */
- STD_CLIB char *strcat(); /* part of the standard C-library */
- STD_CLIB char *strchr(); /* part of the standard C-library */
-
-
-
- strcpy(par_decl, "extern "); /* put "extern" in the front */
- strcat(par_decl, proto); /* (note: here we use 'par_decl' */
- strcpy(proto, par_decl); /* as temporary string storage) */
-
- if ( !has_params(proto) ) /* check & remove parameter-list */
- {
- strcat(proto, "void);\n");
- return;
- }
- /* get next 'par_decl' */
- while ( fgets(par_decl, MAXLINE, fp) != NULL && no_err)
- {
- char *ptr;
-
- if ( ptr = strchr(par_decl,'/') ) /* remove potential comments */
- *ptr = '\0';
-
- strcpy(par_type, get_1st_word(par_decl)); /* get the parameter-type */
-
- if (par_type[0] == '{') /* if we hit a '{' we stop */
- break;
-
- if ( !valid(par_type) ) /* check for valid 'par_type' */
- strcpy(par_type, UNDEFINED_DATA_TYPE);
- /* append ANSI parameter-list */
- no_err = append_param(proto, par_type, par_decl);
- }
-
-
- /* The function append_param() converts 'proto' to the following form: */
- /* "extern fn_type fn_name(ptype p1, ptype p2, ..., ptype pn, " */
- /* So we need to fix 'proto''s tail by 1. erasing the last two chars */
- /* (namely ' ' and ',') and 2. appending the string: ");\n" */
-
- cp = proto + (strlen(proto)-2); /* go two chars back */
- *cp = ')';
- *(cp+1) = ';';
- *(cp+2) = '\n';
- *(cp+3) = '\0';
- }
-
-
- /* ======================================================================== *
- * has_params
- * ======================================================================== */
-
- BOOLEAN has_params(header)
- char *header; /* the function-header line */
- {
- /* This function checks if the parameter-list is empty,
- * and removes all chars after the opening parenthesis.
- * Its task is to modify 'header' and to return TRUE if
- * the parameter-list was empty.
- * NOTICE that the parameter-list is considered empty
- * when either the char ')' comes right after char '('
- * or it consists of white(BLANK) characters.
- */
-
- char *cp1, *cp2; /* temporary pointers */
- BOOLEAN param_yes = FALSE; /* what the function returns */
- STD_CLIB char *strchr(); /* part of the standard C-library */
-
-
-
- cp1 = cp2 = strchr(header,'('); /* save the start of param-list in cp2 */
-
- if ( *(cp1+1) == ')' ) /* if ')' comes right after '(' */
- { /* Remove all charactes coming */
- *(cp1+1) = '\0'; /* right after '(', and... */
- return(FALSE); /* ...return FALSE */
- }
-
- /* check for BLANK parameter-list */
- while ( *cp2 != '\0' && IS_BLANK(*cp2) )
- cp2++;
- if (*cp2 != ')')
- param_yes = TRUE;
-
- *(cp1+1) = '\0';
-
- return(param_yes);
- }
-
-
-
- /* ======================================================================== *
- * append_param
- * ======================================================================== */
-
- BOOLEAN append_param(proto, par_type, par_decl)
- char *proto; /* the output ANSI-prototype */
- char *par_type; /* the data-type of the parameter */
- char *par_decl; /* the parameter-declaration line */
- {
- /* Get a parameter-declaration line, construct the appropriate
- * ANSI-prototyped-declaration string, and append it to the ANSI
- * prototype.
- * *** I m p o r t a n t ***
- * 'par_decl' has been ensured (by function make_ansi()), to be
- * valid (i.e. the 1st word is a valid data-type). So the string
- * coming after the 1st word(='par_type') should be a list of
- * parameters. BUT if 'par_type' is one of the strings "unsigned",
- * short" or "long", then the 2nd word might be "int" (which should
- * not be treated as a parameter, but as part of the parameters'
- * data-type). Furthermore, 'par_type' may be "register", in which
- * case we only the 2nd word only (or the string "register" if the
- * 2nd word is not a valid data-type).
- */
-
- char *s, *param;
- char _str[MAXWORD];
- register int i;
-
- void construct_ANSI_declaration();
- BOOLEAN valid();
- STD_CLIB int *strcmp();
- STD_CLIB char *strcat(), *strtok();
-
-
-
- i = 0; /* skip the first word of 'par_decl' */
- while (par_decl[i] != '\0' && IS_BLANK(par_decl[i])) i++;
- while (par_decl[i] != '\0' && !IS_BLANK(par_decl[i])) i++;
- s = &par_decl[i];
- /* check for: register data-type,*
- * or unsigned/short/long int */
- strcpy(_str, get_1st_word(s));
- if ( valid(_str) )
- {
- if ( !strcmp(par_type,"register") ) /* handle "register" cases */
- strcpy(par_type, _str);
- else /* handle unsigned/long/short */
- {
- strcat(par_type, " "); /* cat " int" in 'par_type'*/
- strcat(par_type, _str);
- }
- while( *s != '\0' && IS_BLANK(*s) ) s++; /* skip the 2nd word: "int" */
- while( *s != '\0' && !IS_BLANK(*s) ) s++;
- }
- /* append ANSI param-declaration to 'proto' */
- while ( (param = strtok(s, ",; \t\n")) != NULL )
- {
- if ( strlen(proto) >= MAXLINE-strlen(par_type)-strlen(param)-4 )
- { /* error-check for "output line too long" */
- strcat(proto, "<...>, ");
- return(FALSE);
- }
- construct_ANSI_declaration(proto, par_type, param);
- s = (char *)NULL;
- }
-
- return(TRUE);
- }
-
-
- /* ======================================================================== *
- * construct_ANSI_declaration
- * ======================================================================== */
-
- void construct_ANSI_declaration(proto, par_type, param)
- char *proto; /* the functin prototype */
- char *par_type; /* parameter's data-type */
- char *param; /* the parameter itself */
- {
- /* Construct the ANSI parameter-declaration,
- * so it can be appended in the parameter-list
- * (if DONT_SHOW_PARAMETERS is not defined, the
- * parameter itself is also included)
- */
-
- register int i; /* just a counter */
- GLOBAL unsigned options; /* bit map with command-line options */
- STD_CLIB int strlen(); /* part of the standard C-library */
- STD_CLIB char *strcat(); /* part of the standard C-library */
-
-
- strcat(proto, par_type); /* append the parameter's data-type */
- strcat(proto, " ");
-
- if (options & SHOW_PARA) /* include parameter in parameter-list */
- strcat(proto, param);
- else /* exclude parameter from parameter-list */
- {
- for (i=0; param[i] != '\0'; i++) /* look for any pointers */
- if (param[i] == '[' || param[i] == '*')
- strcat(proto, "*");
- }
-
- i = strlen(proto)-1; /* separate parameters with ", " */
- if ( proto[i] == ' ' )
- proto[i] = '\0';
- strcat(proto, ", ");
- }
-