Archive Magazine 1996

home *** CD-ROM | disk | FTP | other *** search

/ Archive Magazine 1996 / ARCHIVE_96.iso / discs / shareware / share_43 / source / c / TOKENIZE < prev next >

Wrap

Text File | 1991-08-22 | 18KB | 697 lines

/***************************************************************************** * * tokenize.c * * from DKBTrace (c) 1990 David Buck * * This module implements the first part of a two part parser for the scene * description files. This phase changes the input file into tokens. * * This software is freely distributable. The source and/or object code may be * copied or uploaded to communications services so long as this notice remains * at the top of each file. If any changes are made to the program, you must * clearly indicate in the documentation and in the programs startup message * who it was who made the changes. The documentation should also describe what * those changes were. This software may not be included in whole or in * part into any commercial package without the express written consent of the * author. It may, however, be included in other public domain or freely * distributed software so long as the proper credit for the software is given. * * This software is provided as is without any guarantees or warranty. Although * the author has attempted to find and correct any bugs in the software, he * is not responsible for any damage caused by the use of the software. The * author is under no obligation to provide service, corrections, or upgrades * to this package. * * Despite all the legal stuff above, if you do find bugs, I would like to hear * about them. Also, if you have any comments or questions, you may contact me * at the following address: * * David Buck * 22C Sonnet Cres. * Nepean Ontario * Canada, K2H 8W7 * * I can also be reached on the following bulleton boards: * * ATX (613) 526-4141 * OMX (613) 731-3419 * Mystic (613) 731-0088 or (613) 731-6698 * * Fidonet: 1:163/109.9 * Internet: David_Buck@Carleton.CA * * IBM Port by Aaron A. Collins. Aaron may be reached on the following BBS'es: * * Lattice BBS (708) 916-1200 * The Information Exchange BBS (708) 945-5575 * Stillwaters BBS (708) 403-2826 * *****************************************************************************/ #include <ctype.h> #include "frame.h" #include "dkbproto.h" /* This module tokenizes the input file to create a token file to be read by the parser (the second stage). Tokens written to the file contain a token ID, the line number of the token, and if necessary, some data for the token. */ #define MAX_STRING_INDEX 20 char String[MAX_STRING_INDEX]; int String_Index; int Line_Number = 1; /* Here are the reserved words. If you need to add new words, be sure to declare them in frame.h */ struct Reserved_Word_Struct Reserved_Words [LAST_TOKEN] = { AGATE_TOKEN, "AGATE", ALPHA_TOKEN, "ALPHA", AMBIENT_TOKEN, "AMBIENT", AMPERSAND_TOKEN, "&", AT_TOKEN, "@", BACK_QUOTE_TOKEN, "`", BACK_SLASH_TOKEN, "\\", BAR_TOKEN, "|", BLUE_TOKEN, "BLUE", BRILLIANCE_TOKEN, "BRILLIANCE", BOZO_TOKEN, "BOZO", BOUNDED_TOKEN, "BOUNDED_BY", BUMPS_TOKEN, "BUMPS", CHECKER_TOKEN, "CHECKER", COLON_TOKEN, ":", COLOR_TOKEN, "COLOR", COLOUR_TOKEN, "COLOUR", COLOR_MAP_TOKEN, "COLOR_MAP", COLOUR_MAP_TOKEN, "COLOUR_MAP", COMMA_TOKEN, ",", COMPOSITE_TOKEN, "COMPOSITE", DASH_TOKEN, "-", DECLARE_TOKEN, "DECLARE", DENTS_TOKEN, "DENTS", DIFFERENCE_TOKEN, "DIFFERENCE", DIFFUSE_TOKEN, "DIFFUSE", DIRECTION_TOKEN, "DIRECTION", DOLLAR_TOKEN, "$", END_BOUNDED_TOKEN, "END_BOUND", END_COLOR_MAP_TOKEN, "END_COLOR_MAP", END_COLOUR_MAP_TOKEN, "END_COLOUR_MAP", END_COMPOSITE_TOKEN, "END_COMPOSITE", END_DIFFERENCE_TOKEN, "END_DIFFERENCE", END_FOG_TOKEN, "END_FOG", END_INTERSECTION_TOKEN, "END_INTERSECTION", END_OBJECT_TOKEN, "END_OBJECT", END_OF_FILE_TOKEN, "End of File", END_PLANE_TOKEN, "END_PLANE", END_POINTS_TOKEN, "END_POINTS", END_POLYGON_TOKEN, "END_POLYGON", END_QUADRIC_TOKEN, "END_QUADRIC", END_SHAPE_TOKEN, "END_SHAPE", END_SPHERE_TOKEN, "END_SPHERE", END_TEXTURE_TOKEN, "END_TEXTURE", END_TRIANGLE_TOKEN, "END_TRIANGLE", END_UNION_TOKEN, "END_UNION", END_VIEW_POINT_TOKEN, "END_VIEW_POINT", EQUALS_TOKEN, "=", EXCLAMATION_TOKEN, "!", FLOAT_TOKEN, "FLOAT", FOG_TOKEN, "FOG", FREQUENCY_TOKEN, "FREQUENCY", GIF_TOKEN, "GIF", GRANITE_TOKEN, "GRANITE", GRADIENT_TOKEN, "GRADIENT", GREEN_TOKEN, "GREEN", HASH_TOKEN, "#", HAT_TOKEN, "^", IDENTIFIER_TOKEN, "IDENTIFIER", IFF_TOKEN, "IFF", IMAGEMAP_TOKEN, "IMAGEMAP", INCLUDE_TOKEN, "INCLUDE", INTERSECTION_TOKEN, "INTERSECTION", INVERSE_TOKEN, "INVERSE", IOR_TOKEN, "IOR", LEFT_ANGLE_TOKEN, "<", LEFT_BRACKET_TOKEN, "{", LEFT_SQUARE_TOKEN, "[", LIGHT_SOURCE_TOKEN, "LIGHT_SOURCE", LOCATION_TOKEN, "LOCATION", LOOK_AT_TOKEN, "LOOK_AT", MARBLE_TOKEN, "MARBLE", OBJECT_TOKEN, "OBJECT", ONCE_TOKEN, "ONCE", PERCENT_TOKEN, "%", PHASE_TOKEN, "PHASE", PHONG_TOKEN, "PHONG", PHONGSIZE_TOKEN, "PHONGSIZE", PLANE_TOKEN, "PLANE", PLUS_TOKEN, "+", POINTS_TOKEN, "POINTS", POLYGON_TOKEN, "POLYGON", QUADRIC_TOKEN, "QUADRIC", QUESTION_TOKEN, "?", RAW_TOKEN, "RAW", RED_TOKEN, "RED", REFLECTION_TOKEN, "REFLECTION", REFRACTION_TOKEN, "REFRACTION", REVOLVE_TOKEN, "REVOLVE", RIGHT_TOKEN, "RIGHT", RIGHT_ANGLE_TOKEN, ">", RIGHT_BRACKET_TOKEN, ")", RIGHT_SQUARE_TOKEN, "]", RIPPLES_TOKEN, "RIPPLES", ROTATE_TOKEN, "ROTATE", ROUGHNESS_TOKEN, "ROUGHNESS", SCALE_TOKEN, "SCALE", SEMI_COLON_TOKEN, ";", SHAPE_TOKEN, "SHAPE", SKY_TOKEN, "SKY", SINGLE_QUOTE_TOKEN, "'", SIZE_TOKEN, "SIZE", SLASH_TOKEN, "/", SMOOTH_TRIANGLE_TOKEN, "SMOOTH_TRIANGLE", SPECULAR_TOKEN, "SPECULAR", SPHERE_TOKEN, "SPHERE", SPOTTED_TOKEN, "SPOTTED", STAR_TOKEN, "*", STRING_TOKEN, "STRING", TEXTURE_TOKEN, "TEXTURE", TILDE_TOKEN, "~", TRANSLATE_TOKEN, "TRANSLATE", TRIANGLE_TOKEN, "TRIANGLE", TURBULENCE_TOKEN, "TURBULENCE", UNION_TOKEN, "UNION", UP_TOKEN, "UP", VIEW_POINT_TOKEN, "VIEW_POINT", WAVES_TOKEN, "WAVES", WOOD_TOKEN, "WOOD", WRINKLES_TOKEN, "WRINKLES" }; /* Make a table for user-defined symbols. 200 symbols should be more than enough. */ #define MAX_SYMBOLS 200 /* Hard code symbols to be a maximum of 40 characters long */ char Symbol_Table[MAX_SYMBOLS][40]; int Number_Of_Symbols; char File_Name[FILE_NAME_LENGTH]; extern FILE *Data_File, *Token_File, *Symbol_File; #define CALL(x) { if (!(x)) return (FALSE); } /* The main tokenizing routine. Set up the files and continue parsing until the end of file */ void Tokenize (name, in, symbol, out) char *name; FILE *in, *symbol, *out; { /* Keep track of the file name so we don't get confused when we return from INCLUDE files */ strcpy (File_Name, name); Data_File = in; Token_File = out; Symbol_File = symbol; Number_Of_Symbols = 0; /* Let the parser know the name of the file we are tokenizing */ strcpy (String, name); Write_Token (INCLUDE_TOKEN); while (Process_Token()) ; } /* This function performs most of the work involved in tokenizing. It reads the first character of the token and decides which function to call to tokenize the rest. For simple tokens, it simply writes them out to the token file. */ int Process_Token () { register int c; Skip_Spaces (); c = getc(Data_File); if (c == EOF) return (FALSE); String[0] = '\0'; switch (c) { case '\n': Line_Number++; break; case '{' : Parse_Comments(); break; case '@' : Write_Token (AT_TOKEN); break; case '&' : Write_Token (AMPERSAND_TOKEN); break; case '`' : Write_Token (BACK_QUOTE_TOKEN); break; case '\\': Write_Token (BACK_SLASH_TOKEN); break; case '|' : Write_Token (BAR_TOKEN); break; case ':' : Write_Token (COLON_TOKEN); break; case ',' : Write_Token (COMMA_TOKEN); break; case '-' : Write_Token (DASH_TOKEN); break; case '$' : Write_Token (DOLLAR_TOKEN); break; case '=' : Write_Token (EQUALS_TOKEN); break; case '!' : Write_Token (EXCLAMATION_TOKEN); break; case '#' : Write_Token (HASH_TOKEN); break; case '^' : Write_Token (HAT_TOKEN); break; case '<' : Write_Token (LEFT_ANGLE_TOKEN); break; case '(' : Write_Token (LEFT_BRACKET_TOKEN); break; case '[' : Write_Token (LEFT_SQUARE_TOKEN); break; case '%' : Write_Token (PERCENT_TOKEN); break; case '+' : Write_Token (PLUS_TOKEN); break; case '?' : Write_Token (QUESTION_TOKEN); break; case '>' : Write_Token (RIGHT_ANGLE_TOKEN); break; case ')' : Write_Token (RIGHT_BRACKET_TOKEN); break; case ']' : Write_Token (RIGHT_SQUARE_TOKEN); break; case ';' : Write_Token (SEMI_COLON_TOKEN); break; case '\'': Write_Token (SINGLE_QUOTE_TOKEN); break; case '/' : Write_Token (SLASH_TOKEN); break; case '*' : Write_Token (STAR_TOKEN); break; case '~' : Write_Token (TILDE_TOKEN); break; case '"' : Parse_String (); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ungetc (c, Data_File); CALL (Read_Float ()); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': ungetc (c, Data_File); CALL (Read_Symbol ()); break; default: printf ("Error on line %d\n", Line_Number); printf ("Illegal character in input file, value is %02x\n", c); break; } return (TRUE); } /* Skip over spaces in the input file */ int Skip_Spaces () { register int c; while (TRUE) { c = getc(Data_File); if (c == EOF) return (FALSE); if (!isspace(c)) break; if (c == '\n') Line_Number++; } ungetc (c, Data_File); return (TRUE); } /* Comments start with an open brace ({) and end with a close brace (}). The open brace has been read already. Continue reading until a close brace is encountered. Be sure to count the lines while you're at it. Incidently, nested comments are supported (in case you do such esoteric things) */ int Parse_Comments () { register int c; int End_Of_Comment; End_Of_Comment = FALSE; while (!End_Of_Comment) { c = getc (Data_File); if (c == EOF) { Token_Error ("No closing comment found"); return (FALSE); } if (c == (int) '\n') Line_Number++; if (c == (int) '{') CALL (Parse_Comments()) else End_Of_Comment = (c == (int) '}'); } return (TRUE); } /* The following routines make it easier to handle strings. They stuff characters into a string buffer one at a time making all the proper range checks. Call Begin_String to start, Stuff_Character to put characters in, and End_String to finish. The String variable contains the final string. */ void Begin_String() { String_Index = 0; } void Stuff_Character (c) int c; { if (String_Index < MAX_STRING_INDEX) { String [String_Index++] = (char) c; if (String_Index >= MAX_STRING_INDEX) { Token_Error ("String too long"); String [String_Index-1] = '\0'; } } } void End_String () { Stuff_Character ((int) '\0'); } /* Read a float from the input file and tokenize it as one token. The phase variable is 0 for the first character, 1 for all subsequent characters up to the decimal point, and 2 for all characters after the decimal point. This helps to insure that the number is formatted properly. */ int Read_Float() { register int c, Finished, Phase; Finished = FALSE; Phase = 0; Begin_String(); while (!Finished) { c = getc(Data_File); if (c == EOF) { Token_Error ("Unexpected end of file"); return (FALSE); } switch (Phase) { case 0: if (isdigit(c)) Stuff_Character(c); else Token_Error ("Error in decimal number"); Phase = 1; break; case 1: if (isdigit(c)) Stuff_Character(c); else if (c == (int) '.') { Stuff_Character(c); Phase = 2; } else Finished = TRUE; break; case 2: if (isdigit(c)) Stuff_Character(c); else Finished = TRUE; break; } } ungetc (c, Data_File); End_String(); Write_Token (FLOAT_TOKEN); return (TRUE); } /* Parse a string from the input file into a token. */ int Parse_String () { register int c; Begin_String(); while (TRUE) { c = getc(Data_File); if (c == EOF) { Token_Error ("No end quote for string"); return (FALSE); } if (c != (int) '"') Stuff_Character (c); else break; } End_String(); Write_Token (STRING_TOKEN); return (TRUE); } /* Read an include file. This can be a bit tricky. The old files are saved in local variables while the include file is being read. We have to write out an INCLUDE token when we start to tell the parser the name of the file we're in. We have to write another INCLUDE token when we finish to tell the parser that we're back. */ int Read_Include () { register int c; FILE *new_file, *original_file; register int Old_Line_Number; char Old_File_Name[FILE_NAME_LENGTH]; Skip_Spaces(); if (getc(Data_File) != (int) '\"') { printf ("Start quote expected\n"); exit (0); } Begin_String(); while (TRUE) { c = getc(Data_File); if (c == EOF) { Token_Error ("No end quote for string"); return (FALSE); } if (c != (int) '"') Stuff_Character (c); else break; } End_String(); if ((new_file = fopen (String, "r")) == NULL) { printf ("Cannot open include file %s\n", String); close_all(); exit(1); } Write_Token (INCLUDE_TOKEN); Old_Line_Number = Line_Number; Line_Number = 1; original_file = Data_File; strcpy (Old_File_Name, File_Name); Tokenize (String, new_file, Symbol_File, Token_File); fclose (new_file); Data_File = original_file; Line_Number = Old_Line_Number; strcpy (File_Name, Old_File_Name); strcpy (String, Old_File_Name); Write_Token (INCLUDE_TOKEN); return (TRUE); } /* Read in a symbol from the input file. Check to see if it is a reserved word. If it is, write out the appropriate token. Otherwise, write the symbol out to the Symbol file and write out an IDENTIFIER token. An Identifier token is a token whose token number is greater than the highest reserved word. */ int Read_Symbol () { register int c, Symbol_Id; Begin_String(); while (TRUE) { c = getc(Data_File); if (c == EOF) { Token_Error ("Unexpected end of file"); return (FALSE); } if (isalpha(c) || isdigit(c) || c == (int) '_') Stuff_Character (c); else { ungetc (c, Data_File); break; } } End_String(); if ((Symbol_Id = Find_Reserved()) != -1) /* INCLUDE is a reserved word, but we want to handle it separately */ if (Symbol_Id == INCLUDE_TOKEN) Read_Include(); else Write_Token (Symbol_Id); else { if ((Symbol_Id = Find_Symbol()) == -1) if (++Number_Of_Symbols < MAX_SYMBOLS) { strncpy (&Symbol_Table[Number_Of_Symbols][0], &String[0], 39); Symbol_Table[Number_Of_Symbols][39] = '\0'; fprintf (Symbol_File, "%s\n", &String[0]); Symbol_Id = Number_Of_Symbols; } else { printf ("\nToo many symbols\n"); exit(0); } Write_Token (LAST_TOKEN + Symbol_Id); } return (TRUE); } /* Return the index the token in the reserved words table or -1 if it isn't there. */ int Find_Reserved () { register int i; for (i = 0 ; i < LAST_TOKEN ; i++) if (strcmp (Reserved_Words[i].Token_Name, &(String[0])) == 0) return (Reserved_Words[i].Token_Number); return (-1); } /* Check to see if a symbol already exists with this name. If so, return its symbol ID. */ int Find_Symbol () { register int i; for (i = 1 ; i <= Number_Of_Symbols ; i++) if (strcmp (&Symbol_Table[i][0], &(String[0])) == 0) return (i); return (-1); } /* Write a token out to the token file */ void Write_Token (Token_Id) TOKEN Token_Id; { fprintf (Token_File, "%d %d %s\n", Token_Id, Line_Number, String); } /* Was (long) Token_Id... */ /* Report an error */ void Token_Error (str) char *str; { printf ("Error on line %d\n", Line_Number); puts(str); puts("\n\n"); }