home *** CD-ROM | disk | FTP | other *** search
/ The C Users' Group Library 1994 August / wc-cdrom-cusersgrouplibrary-1994-08.iso / vol_300 / 345_01 / tlcgtok.c < prev    next >
C/C++ Source or Header  |  1989-07-10  |  13KB  |  572 lines

  1. /* TLCGTOK.C - "The Last Cross-referencer" - C Get Token routines        */
  2. /*    Last Modified:    02/10/89                                            */
  3.  
  4. /*
  5. ---------------------------------------------------------------------
  6. Copyright (c) 1987-1989, Eduard Schwan Programs [esp] - All rights reserved
  7. TLC (The Last C-Cross-Referencer) and TLP (same, but for Pascal) are
  8. Cross-Reference Generators crafted and shot into the Public Domain by
  9. Eduard Schwan.  The source code and executable program may be freely
  10. distributed as long as the copyright/author notices remain intact, and
  11. it is not used in part or whole as the basis of a commercial product.
  12. Any comments, bug-fixes, or enhancements are welcome.
  13. Also, if you find TLC and it's source code useful, a contribution of
  14. $20 (check/money order) is encouraged!  Hopefully we will all see more
  15. source code distributed!
  16.     Eduard Schwan, 1112 Oceanic Drive, Encinitas, Calif. 92024
  17. ---------------------------------------------------------------------
  18. */
  19.  
  20. /*
  21. HEADER:        The Last Cross-Referencer;
  22. TITLE:        TLC/TLP - The Last Cross-Referencer;
  23. VERSION:    1.01;
  24.  
  25. DESCRIPTION: "TLC/TLP.
  26.             C Get Token routines";
  27.  
  28. KEYWORDS:    Utility, Cross-reference, C, Pascal, Apple, Macintosh, APW, Aztec;
  29. SYSTEM:        Macintosh MPW, v3.0;
  30. FILENAME:    TLCGTOK.C;
  31. WARNINGS:    "Has not yet been ported to MS-DOS.
  32.             Shareware, $20 Check/Money Order suggested.";
  33.  
  34. SEE-ALSO:    README.TLC,TLCHELP.DOC,TLPHELP.DOC;
  35. AUTHORS:    Eduard Schwan;
  36. COMPILERS:    AZTEC C65 v3.2b, APPLEIIGS APW C v1.0, APPLE MACINTOSH MPW C v3.0;
  37. */
  38.  
  39.  
  40. /*------------------------ include files -------------------------*/
  41.  
  42. #include    <stdio.h>
  43. #include    <ctype.h>
  44. #include    "tlc.h"
  45.  
  46. /*------------------------- definitions -------------------------*/
  47.  
  48. #define     Eoln                0    /* character constants */
  49. #define     Tab                 '\t'
  50. #define     CReturn             '\r'
  51. #define     Newline             '\n'
  52. #define     Space                ' '
  53. #define     Minus                '-'
  54. #define     Plus                '+'
  55. #define     Equals                '='
  56. #define     Single_quote        '\''
  57. #define     Double_quote        '"'
  58. #define     Pound_sign            '#'
  59. #define     Slash                '/'
  60. #define     Backslash            '\\'
  61. #define     Asterisk            '*'
  62. #define     Period                '.'
  63. #define     Underscore            '_'
  64.  
  65.  
  66. /*--------------------- external declarations --------------------*/
  67.  
  68. #include    "tlc.ext"
  69.  
  70.  
  71. /*------------------------ static variables -----------------------*/
  72.  
  73. static char             curr_ch;
  74. static pos_int            curr_column;
  75. static int                error;
  76. static boolean            got_token;
  77. static boolean            in_comment;
  78. static boolean            pushed_token;
  79. static token_rec_type    prev_token;
  80. static int                sym_length;
  81.  
  82. static boolean            case_sensitive;
  83. static boolean            do_underscores;
  84. static boolean            del_quotes;
  85.  
  86.  
  87. /*================[ init_scanner ]==================*/
  88.  
  89. VOID init_scanner(Case_Sensitive, Do_Underscores, Del_Quotes)
  90. boolean     Case_Sensitive;
  91. boolean     Do_Underscores;
  92. boolean     Del_Quotes;
  93.  
  94.     { /* init_scanner() */
  95. /*
  96. debug(printf("init_scanner:case=%d under=%d delq=%d\n",\
  97. Case_Sensitive,Do_Underscores,Del_Quotes);)
  98. */
  99.     case_sensitive = Case_Sensitive;
  100.     do_underscores = Do_Underscores;
  101.     del_quotes       = Del_Quotes;
  102.     curr_column = 0;
  103.     error        = 0;
  104.     in_comment    = FALSE;
  105.     pushed_token= FALSE;
  106.     } /* init_scanner() */
  107.  
  108.  
  109. /*=================[ push_token ]===================*/
  110.  
  111. VOID push_token()
  112.  
  113.     { /* push_token() */
  114. /*
  115. debug(printf("push_token:\n");)
  116. */
  117.     if (pushed_token)
  118.         {
  119.         fprintf(stderr,"Error! Token stack overflow..");
  120.         exit(1);
  121.         }
  122.     else
  123.         {
  124.         prev_token = token;
  125.         pushed_token = TRUE;
  126.         }
  127.     } /* push_token() */
  128.  
  129.  
  130. /*================[ init_sym_buff ]=================*/
  131.  
  132. static VOID init_sym_buff()
  133.  
  134.     { /* init_sym_buff() */
  135. /*-------- do this if no FILLCHAR ------------
  136. pos_int k;
  137. for (k=0;k<LINE_SIZE;k++) token.tok_string[k]=0;
  138. ----------------------------------------------*/
  139.     FILLCHAR(token.tok_string, LINE_SIZE, 0);
  140.     sym_length = 0;
  141. /*debug(printf("init_sym_buff:\n");)*/
  142.     } /* init_sym_buff() */
  143.  
  144.  
  145. /*================[ add_sym_buff ]=================*/
  146.  
  147. static VOID add_sym_buff(curr_ch)
  148. char curr_ch;
  149.  
  150.     { /* add_sym_buff() */
  151. /*
  152. debug(printf("add_sym_buff:'%c'\n",curr_ch);)
  153. */
  154.     if (sym_length < LINE_SIZE)
  155.         {
  156.         token.tok_string[sym_length] = curr_ch;
  157.         sym_length++;
  158.         }
  159.     } /* add_sym_buff() */
  160.  
  161.  
  162. /*================[ del_sym_buff ]=================*/
  163.  
  164. static VOID del_sym_buff()
  165.  
  166.     { /* del_sym_buff() */
  167. /*
  168. debug(printf("del_sym_buff:\n");)
  169. */
  170.     if (sym_length > 0)
  171.         {
  172.         sym_length--;
  173.         token.tok_string[sym_length] = Eoln;
  174.         }
  175.     } /* del_sym_buff() */
  176.  
  177.  
  178. /*================[ in_symbolset ]=================*/
  179.  
  180. static boolean in_symbolset(ch)
  181. register char    ch;
  182.  
  183.     { /* in_symbolset() */
  184.     return(
  185.             isalpha(ch) || isdigit(ch) || ch==Underscore
  186.           );
  187.     } /* in_symbolset() */
  188.  
  189.  
  190. /*==============[ do_identifier ]===============*/
  191.  
  192. static VOID do_identifier()
  193.  
  194.     { /* do_identifier() */
  195.     boolean     done;
  196.  
  197. /*
  198. debug(printf("do_identifier:\n");)
  199. */
  200.     done = FALSE;
  201.     while (!error && !done)
  202.         {
  203.         curr_ch = curr_line[curr_column];
  204.         if (!case_sensitive)
  205.             curr_ch = tolower(curr_ch);
  206.         if (curr_ch == Eoln)
  207.             done = TRUE;
  208.         else
  209.         if (in_symbolset(curr_ch))
  210.             {
  211.             if (curr_ch!=Underscore || do_underscores)
  212.                 add_sym_buff(curr_ch);
  213.             curr_column++;
  214.             }
  215.         else
  216.             done = TRUE;
  217.         } /*while*/
  218.  
  219.     if (!error)
  220.         got_token = TRUE;
  221.  
  222.     } /* do_identifier() */
  223.  
  224.  
  225. /*================[ in_numberset ]=================*/
  226.  
  227. static boolean in_numberset(ch,first_time,base)
  228. register char        ch;
  229. register boolean    first_time;
  230. register pos_int    base;
  231.  
  232.     { /* in_numberset() */
  233.     return(
  234.             isdigit(ch) ||
  235.             ch=='x' || ch=='e' || ch=='l' ||
  236.             ch==Period    ||
  237.             (first_time && (ch==Minus || ch==Plus)) ||
  238.             (base==B_hex && ch>='a' && ch<='f')
  239.           );
  240.     } /* in_numberset() */
  241.  
  242.  
  243. /*================[ convert_number ]=================*/
  244.  
  245. static long convert_number(sptr,base)
  246. char*        sptr;
  247. pos_int     base;
  248.  
  249.     { /* convert_number() */
  250.     register char    ch;
  251.     register long    result;
  252.     boolean         done;
  253.  
  254. /*
  255. debug(printf("convert_number:'%s'\n",sptr);)
  256. */
  257.     result    = 0;
  258.     done    = FALSE;
  259.  
  260.     while (*sptr && !done)
  261.         {
  262.         ch = *(sptr++);
  263.         if (isdigit(ch))
  264.             { /* shift result & add next digit in */
  265.             result = result*base + (ch-'0');
  266.             }
  267.         else
  268.         if (base==B_hex && ch>='a' && ch<='f')
  269.             result = result*base + (ch-'a'+10);
  270.         else
  271.         if (ch!='x') /* only x (for hexmode) is allowable */
  272.             done = TRUE;
  273. /*
  274. debug(printf("  added %c and got %ld\n",ch,result););
  275. */
  276.         }
  277.     return(result);
  278.     } /* convert_number() */
  279.  
  280.  
  281. /*================[ do_number ]=================*/
  282.  
  283. static VOID do_number()
  284.  
  285.     { /* do_number() */
  286.     boolean     done,
  287.                 first_time;
  288.     pos_int     base;
  289.  
  290. /*
  291. debug(printf("do_number:\n");)
  292. */
  293.     done            = FALSE;
  294.     first_time        = TRUE;
  295.     if (curr_line[curr_column] == '0')
  296.         base    = B_octal;
  297.     else
  298.         base    = B_decimal;
  299.     while (!error && !done)
  300.         {
  301.         curr_ch = tolower(curr_line[curr_column]);
  302.         if (curr_ch == Eoln)
  303.             done = TRUE;
  304.         else
  305.             { /* see if it's a valid digit, if so, add it */
  306.             if (curr_ch=='x')
  307.                 base = B_hex;
  308.             if (in_numberset(curr_ch,first_time,base))
  309.                 {
  310.                 add_sym_buff(curr_line[curr_column]);
  311.                 curr_column++;
  312.                 first_time = FALSE;
  313.                 }
  314.             else
  315.                 {
  316.                 done = TRUE;
  317. /*
  318. debug(printf(" stopping because %c/%d isnt in number_set\n",curr_ch, curr_ch);)
  319. */
  320.                 }
  321.             } /* see if it's a valid digit, if so, add it */
  322.         } /*while*/
  323.  
  324.     /* all done validating & collecting number, now convert it */
  325.     if (!error)
  326.         token.tok_value = convert_number(token.tok_string,base);
  327.  
  328.     if (!error)
  329.         got_token = TRUE;
  330.  
  331.     } /* do_number() */
  332.  
  333.  
  334. /*================[ do_str_literal ]=================*/
  335.  
  336. static VOID do_str_literal()
  337.  
  338.     { /* do_str_literal() */
  339.     char        terminator;
  340.     boolean     done,skip_slash;
  341.  
  342. /*
  343. debug(printf("do_str_literal:\n");)
  344. */
  345.     terminator = curr_ch; /* single/double quote */
  346.  
  347. /* remove leading/trailing quotes */
  348.     if (del_quotes)
  349.         init_sym_buff();
  350.  
  351.     /* read until trailing quote found */
  352.     done = FALSE;
  353.     while (!error && !done)
  354.         {
  355.         curr_ch = curr_line[curr_column];
  356.         skip_slash = FALSE;
  357.         if (curr_ch == Eoln)
  358.             { /* Hit End of line without finding trailing quote - error */
  359.             error = TERR_BAD_STR;
  360.             }
  361.         else
  362.             if (curr_ch == terminator)
  363.                 {
  364.                 done = TRUE; /* found trailing quote - end */
  365.                 }
  366.             else
  367.                 if (curr_ch == Backslash)
  368.                     { /* skip next character (in case its emedded terminator)*/
  369.                     skip_slash = TRUE;
  370.                     }
  371.  
  372.             /* if valid character in string, add it to buffer */
  373.             if (!error)
  374.                 {
  375.                 if (!(done && del_quotes))
  376.                     add_sym_buff(curr_ch);
  377.                 }
  378.             curr_column++;
  379.  
  380.         /* if backslash "escape sequence", put the next character */
  381.         /* in the buffer too, but don't check it for ' or "       */
  382.         if (skip_slash)
  383.             {
  384.             skip_slash = FALSE;
  385.             curr_ch = curr_line[curr_column];
  386.             add_sym_buff(curr_ch);
  387.             curr_column++;
  388.             }
  389.         } /*while*/
  390.  
  391.     if (!error)
  392.         got_token = TRUE;
  393.  
  394.     } /* do_str_literal() */
  395.  
  396.  
  397. /*================[ do_special_syms ]=================*/
  398.  
  399. static VOID do_special_syms()
  400.  
  401.     { /* do_special_syms() */
  402.  
  403. /*
  404. debug(printf("do_special_syms:\n");)
  405. */
  406.     /*
  407.     the token type for special symbols is the ASCII code of
  408.     that symbol, unless it is a multi-char. symbol, in which
  409.     case the results of the following line are changed later
  410.     */
  411.     token.tok_type = curr_ch;
  412.  
  413.     /*
  414.     now handle each character type
  415.     */
  416.     switch (curr_ch)
  417.         {
  418.         case Slash:
  419.             if (curr_line[curr_column++]==Asterisk)
  420.                 {
  421.                 in_comment = TRUE;
  422. /*debug(puts("/*--entering comment");)*/
  423.                 }
  424.             else
  425.                 {
  426.                 curr_column--;
  427.                 got_token = TRUE;
  428.                 }
  429.             break;
  430.  
  431.         case Minus: /**** TEMPORARY SMARTS.. SOMEDAY THIS SHOULD BE DONE IN PARSER! ****/
  432.             if (isdigit(curr_line[curr_column+1]))
  433.                 {
  434.                 do_number();
  435.                 token.tok_type = TOK_NCONST;
  436.                 }
  437.             else
  438.                 got_token = TRUE;
  439.             break;
  440.         case Equals:
  441.         case Pound_sign:
  442.         default:
  443. /*------ allow any characters for now DEBUG!
  444.             error = TERR_BAD_CHR;
  445. -----------------------------------------------*/ got_token = TRUE;
  446.             break;
  447.         } /*switch*/
  448.  
  449.     } /* do_special_syms() */
  450.  
  451.  
  452. /*=================[ get_token ]===================*/
  453.  
  454. int get_token()
  455.  
  456.     { /* get_token() */
  457.  
  458.     if (pushed_token)
  459.         { /* retrieve previous token & exit */
  460.         token         = prev_token;
  461.         pushed_token = FALSE;
  462. debug(printf(" get prev. token\n");)
  463.         return(0); /* no error */
  464.         }
  465.  
  466.     got_token            = FALSE;
  467.     error                = 0;
  468.     token.tok_type        = 0;
  469.     token.tok_value     = 0;
  470.     token.tok_string[0] = 0;
  471.     do    {
  472.         /*
  473.         skip if still in comment from previous call
  474.         */
  475.         if (in_comment)
  476.             {
  477.             while (in_comment &&
  478.                    (curr_ch = curr_line[curr_column]) != Eoln)
  479.                 {
  480.                 curr_column++;
  481.                 if (curr_ch == Asterisk)
  482.                     {
  483.                     curr_ch = curr_line[curr_column];
  484.                     if (curr_ch == Slash)
  485.                         {
  486.                         in_comment = FALSE;
  487.                         curr_column++;
  488. /*debug(puts("--** exiting comment");)*/
  489.                         }
  490.                     }
  491.                 }
  492.             if (in_comment) /* then must have hit end of line */
  493.                 {
  494.                 error = TERR_EOLN;
  495.                 token.tok_column = curr_column;
  496.                 curr_column = 0;
  497.                 }
  498.             } /* if in_comment */
  499.  
  500.         /*
  501.         skip leading spaces/tabs or trailing \n or \r
  502.         */
  503.         if (!error)
  504.             {
  505.             while (curr_line[curr_column] == Space ||
  506.                    curr_line[curr_column] == Tab ||
  507.                    curr_line[curr_column] == CReturn ||
  508.                    curr_line[curr_column] == Newline)
  509.                 {
  510.                 curr_column++;
  511.                 }
  512.             if (curr_line[curr_column] == Eoln)
  513.                 { /* hit end of line */
  514.                 error = TERR_EOLN;
  515.                 token.tok_column = curr_column;
  516.                 curr_column = 0;
  517.                 }
  518.             }
  519.  
  520.         /*
  521.         now at 1st character of new token, handle it
  522.         */
  523.         if (!error)
  524.             { /* if !error */
  525.             curr_ch = curr_line[curr_column++];
  526.             token.tok_column = curr_column;
  527.             if (!case_sensitive)
  528.                 curr_ch = tolower(curr_ch);
  529.             init_sym_buff();
  530.             add_sym_buff(curr_ch);
  531.             if (!error)
  532.                 {
  533.                 if (isalpha(curr_ch) || curr_ch==Underscore)
  534.                     {
  535.                     do_identifier();
  536.                     token.tok_type = TOK_ID;
  537.                     }
  538.                 else
  539.                 if (isdigit(curr_ch))
  540.                     {
  541.                     do_number();
  542.                     token.tok_type = TOK_NCONST;
  543.                     }
  544.                 else
  545.                 if (curr_ch==Single_quote)
  546.                     {
  547.                     do_str_literal();
  548.                     token.tok_type    = TOK_CCONST;
  549.                     token.tok_value = (unsigned int) token.tok_string[0];
  550.                     }
  551.                 else
  552.                 if (curr_ch==Double_quote)
  553.                     {
  554.                     do_str_literal();
  555.                     token.tok_type = TOK_SCONST;
  556.                     }
  557.                 else
  558.                     { /* check for special symbols */
  559.                     do_special_syms();
  560.                     }
  561.                 }
  562.             } /* if !error */
  563.  
  564.         } while (!(error || got_token));
  565.  
  566. /*
  567. printf("get_token:typ=%d col=%d val=%ld str='%s' err=%d\n",
  568. token.tok_type,token.tok_column,token.tok_value,token.tok_string,error);
  569. */
  570.     return (error);
  571.     } /* get_token() */
  572.