home *** CD-ROM | disk | FTP | other *** search
/ The C Users' Group Library 1994 August / wc-cdrom-cusersgrouplibrary-1994-08.iso / vol_300 / 345_01 / tlpgtok.c < prev    next >
C/C++ Source or Header  |  1989-07-10  |  14KB  |  613 lines

  1. /* TLPGTOK.C - "The Last Cross-referencer" - Pascal Get Token routines    */
  2. /*    Last Modified:    02/10/89                                            */
  3.  
  4. /*
  5. ---------------------------------------------------------------------
  6. Copyright (c) 1987-1989, Eduard Schwan Programs [esp] - All rights reserved
  7. TLC (The Last C-Cross-Referencer) and TLP (same, but for Pascal) are
  8. Cross-Reference Generators crafted and shot into the Public Domain by
  9. Eduard Schwan.  The source code and executable program may be freely
  10. distributed as long as the copyright/author notices remain intact, and
  11. it is not used in part or whole as the basis of a commercial product.
  12. Any comments, bug-fixes, or enhancements are welcome.
  13. Also, if you find TLC and it's source code useful, a contribution of
  14. $20 (check/money order) is encouraged!  Hopefully we will all see more
  15. source code distributed!
  16.     Eduard Schwan, 1112 Oceanic Drive, Encinitas, Calif. 92024
  17. ---------------------------------------------------------------------
  18. */
  19.  
  20. /*
  21. HEADER:        The Last Cross-Referencer;
  22. TITLE:        TLC/TLP - The Last Cross-Referencer;
  23. VERSION:    1.01;
  24.  
  25. DESCRIPTION: "TLC/TLP.
  26.             Pascal Get Token routines";
  27.  
  28. KEYWORDS:    Utility, Cross-reference, C, Pascal, Apple, Macintosh, APW, Aztec;
  29. SYSTEM:        Macintosh MPW, v3.0;
  30. FILENAME:    TLPGTOK.C;
  31. WARNINGS:    "Has not yet been ported to MS-DOS.
  32.             Shareware, $20 Check/Money Order suggested.";
  33.  
  34. SEE-ALSO:    README.TLC,TLCHELP.DOC,TLPHELP.DOC;
  35. AUTHORS:    Eduard Schwan;
  36. COMPILERS:    AZTEC C65 v3.2b, APPLEIIGS APW C v1.0, APPLE MACINTOSH MPW C v3.0;
  37. */
  38.  
  39.  
  40. /*------------------------ include files -------------------------*/
  41.  
  42. #include    <stdio.h>
  43. #include    <ctype.h>
  44. #include    "tlc.h"
  45.  
  46. /*------------------------- definitions -------------------------*/
  47.  
  48. #define     Eoln                0    /* character constants */
  49. #define     Tab                 '\t'
  50. #define     CReturn             '\r'
  51. #define     Newline             '\n'
  52. #define     Space                ' '
  53. #define     Minus                '-'
  54. #define     Plus                '+'
  55. #define     Equals                '='
  56. #define     Single_quote        '\''
  57. #define     Double_quote        '"'
  58. #define     Dollar_sign            '$'
  59. #define     Pound_sign            '#'
  60. #define     Slash                '/'
  61. #define     Backslash            '\\'
  62. #define     Open_paren            '('
  63. #define     Close_paren            ')'
  64. #define     Open_brace            '{'
  65. #define     Close_brace            '}'
  66. #define     Asterisk            '*'
  67. #define     Period                '.'
  68. #define     Underscore            '_'
  69.  
  70.  
  71. /*--------------------- external declarations --------------------*/
  72.  
  73. #include    "tlc.ext"
  74.  
  75.  
  76. /*------------------------ static variables -----------------------*/
  77.  
  78. static char             curr_ch;
  79. static pos_int            curr_column;
  80. static int                error;
  81. static boolean            got_token;
  82. static boolean            in_comment;
  83. static boolean            pushed_token;
  84. static token_rec_type    prev_token;
  85. static int                sym_length;
  86.  
  87. static boolean            case_sensitive;
  88. static boolean            do_underscores;
  89. static boolean            del_quotes;
  90.  
  91.  
  92. /*================[ init_scanner ]==================*/
  93.  
  94. VOID init_scanner(Case_Sensitive, Do_Underscores, Del_Quotes)
  95. boolean     Case_Sensitive;
  96. boolean     Do_Underscores;
  97. boolean     Del_Quotes;
  98.  
  99.     { /* init_scanner() */
  100. /*
  101. debug(printf("init_scanner:case=%d under=%d delq=%d\n",\
  102. Case_Sensitive,Do_Underscores,Del_Quotes);)
  103. */
  104.     case_sensitive = Case_Sensitive;
  105.     do_underscores = Do_Underscores;
  106.     del_quotes       = Del_Quotes;
  107.     curr_column = 0;
  108.     error        = 0;
  109.     in_comment    = FALSE;
  110.     pushed_token= FALSE;
  111.     } /* init_scanner() */
  112.  
  113.  
  114. /*=================[ push_token ]===================*/
  115.  
  116. VOID push_token()
  117.  
  118.     { /* push_token() */
  119. /*
  120. debug(printf("push_token:\n");)
  121. */
  122.     if (pushed_token)
  123.         {
  124.         fprintf(stderr,"Error! Token stack overflow..");
  125.         exit(1);
  126.         }
  127.     else
  128.         {
  129.         prev_token = token;
  130.         pushed_token = TRUE;
  131.         }
  132.     } /* push_token() */
  133.  
  134.  
  135. /*================[ init_sym_buff ]=================*/
  136.  
  137. static VOID init_sym_buff()
  138.  
  139.     { /* init_sym_buff() */
  140. /*-------- do this if no FILLCHAR ------------
  141. pos_int k;
  142. for (k=0;k<LINE_SIZE;k++) token.tok_string[k]=0;
  143. ----------------------------------------------*/
  144.     FILLCHAR(token.tok_string, LINE_SIZE, 0);
  145.     sym_length = 0;
  146. /*debug(printf("init_sym_buff:\n");)*/
  147.     } /* init_sym_buff() */
  148.  
  149.  
  150. /*================[ add_sym_buff ]=================*/
  151.  
  152. static VOID add_sym_buff(curr_ch)
  153. char curr_ch;
  154.  
  155.     { /* add_sym_buff() */
  156. /*
  157. debug(printf("add_sym_buff:'%c'\n",curr_ch);)
  158. */
  159.     if (sym_length < LINE_SIZE)
  160.         {
  161.         token.tok_string[sym_length] = curr_ch;
  162.         sym_length++;
  163.         }
  164.     } /* add_sym_buff() */
  165.  
  166.  
  167. /*================[ del_sym_buff ]=================*/
  168.  
  169. static VOID del_sym_buff()
  170.  
  171.     { /* del_sym_buff() */
  172. /*
  173. debug(printf("del_sym_buff:\n");)
  174. */
  175.     if (sym_length > 0)
  176.         {
  177.         sym_length--;
  178.         token.tok_string[sym_length] = Eoln;
  179.         }
  180.     } /* del_sym_buff() */
  181.  
  182.  
  183. /*================[ in_symbolset ]=================*/
  184.  
  185. static boolean in_symbolset(ch)
  186. register char    ch;
  187.  
  188.     { /* in_symbolset() */
  189.     return(
  190.             isalpha(ch) || isdigit(ch) || ch==Underscore
  191.           );
  192.     } /* in_symbolset() */
  193.  
  194.  
  195. /*==============[ do_identifier ]===============*/
  196.  
  197. static VOID do_identifier()
  198.  
  199.     { /* do_identifier() */
  200.     boolean     done;
  201.  
  202. /*
  203. debug(printf("do_identifier:\n");)
  204. */
  205.     done = FALSE;
  206.     while (!error && !done)
  207.         {
  208.         curr_ch = curr_line[curr_column];
  209.         if (!case_sensitive)
  210.             curr_ch = tolower(curr_ch);
  211.         if (curr_ch == Eoln)
  212.             done = TRUE;
  213.         else
  214.         if (in_symbolset(curr_ch))
  215.             {
  216.             if (curr_ch!=Underscore || do_underscores)
  217.                 add_sym_buff(curr_ch);
  218.             curr_column++;
  219.             }
  220.         else
  221.             done = TRUE;
  222.         } /*while*/
  223.  
  224.     if (!error)
  225.         got_token = TRUE;
  226.  
  227.     } /* do_identifier() */
  228.  
  229.  
  230. /*================[ in_numberset ]=================*/
  231.  
  232. static boolean in_numberset(ch, nextch,first_time,base)
  233. register char        ch;
  234. register char        nextch;
  235. register boolean    first_time;
  236. register pos_int    base;
  237.  
  238.     { /* in_numberset() */
  239.     /* this doesn't quite handle reals correctly.. 2nd signs goof it up */
  240.     /* in exponents, i.e.  -256E-5                                        */
  241.     return(
  242.             isdigit(ch) ||
  243.             ch=='e' || (ch==Period && nextch != Period) ||
  244.             (base==B_hex && ch>='a' && ch <= 'f') ||
  245.             (first_time && (ch==Minus || ch==Plus))
  246.           );
  247.     } /* in_numberset() */
  248.  
  249.  
  250. /*================[ convert_number ]=================*/
  251.  
  252. static long convert_number(sptr,base)
  253. char*        sptr;
  254. pos_int     base;
  255.  
  256.     { /* convert_number() */
  257.     register char    ch;
  258.     register long    result;
  259.     boolean         done;
  260.  
  261.  
  262. /*debug(printf("convert_number:'%s'\n",sptr);)*/
  263.  
  264.     result    = 0;
  265.     done    = FALSE;
  266.  
  267.     while (*sptr && !done)
  268.         {
  269.         ch = *(sptr++);
  270.         if (isdigit(ch))
  271.             { /* shift result & add next digit in */
  272.             result = result*base + (ch-'0');
  273.             }
  274.         else
  275.         if (base==B_hex && ch>='a' && ch<='f')
  276.             result = result*base + (ch-'a'+10);
  277.         else
  278.             done = TRUE;
  279.  
  280. /*debug(printf("  added %c and got %ld\n",ch,result););*/
  281.  
  282.         }
  283.     return(result);
  284.     } /* convert_number() */
  285.  
  286.  
  287. /*================[ do_number ]=================*/
  288.  
  289. static VOID do_number()
  290.  
  291.     { /* do_number() */
  292.     boolean     done,
  293.                 first_time;
  294.     pos_int     base;
  295.  
  296.  
  297. /*debug(printf("do_number:\n");)*/
  298.  
  299.     done            = FALSE;
  300.     first_time        = TRUE;
  301.     if (curr_ch == Dollar_sign)
  302.         {
  303.         base    = B_hex;
  304.         }
  305.     else
  306.         base    = B_decimal;
  307.     while (!error && !done)
  308.         {
  309.         curr_ch = tolower(curr_line[curr_column]);
  310.         if (curr_ch == Eoln)
  311.             done = TRUE;
  312.         else
  313.             { /* see if it's a valid digit, if so, add it */
  314.             if (in_numberset(curr_ch,curr_line[curr_column],first_time,base))
  315.                 {
  316.                 add_sym_buff(curr_line[curr_column]);
  317.                 curr_column++;
  318.                 first_time = FALSE;
  319.                 }
  320.             else
  321.                 {
  322.                 done = TRUE;
  323. /*
  324. debug(printf(" stopping because %c/%d isnt in number_set\n",curr_ch, curr_ch);)
  325. */
  326.                 }
  327.             } /* see if it's a valid digit, if so, add it */
  328.         } /*while*/
  329.  
  330.     /* all done validating & collecting number, now convert it */
  331.     if (!error)
  332.         token.tok_value = convert_number(token.tok_string,base);        
  333.  
  334.     if (!error)
  335.         got_token = TRUE;
  336.  
  337.     } /* do_number() */
  338.  
  339.  
  340. /*================[ do_str_literal ]=================*/
  341.  
  342. static VOID do_str_literal()
  343.  
  344.     { /* do_str_literal() */
  345.     char        terminator;
  346.     boolean     done;
  347.  
  348. /*
  349. debug(printf("do_str_literal:\n");)
  350. */
  351.     terminator = curr_ch; /* single/double quote */
  352.  
  353.     /* remove leading/trailing quotes */
  354.     if (del_quotes)
  355.         init_sym_buff();
  356.  
  357.     /* read until trailing quote found */
  358.     done = FALSE;
  359.     while (!error && !done)
  360.         {
  361.         curr_ch = curr_line[curr_column];
  362.         if (curr_ch == Eoln)
  363.             { /* Hit End of line without finding trailing quote - error */
  364.             error = TERR_BAD_STR;
  365.             }
  366.         else
  367.             {
  368.             if (curr_ch == terminator)
  369.                 { /* is it doubled? */
  370.                 if (curr_line[curr_column+1] == terminator)
  371.                     { /* skip past doubled quote, it'll be added below */
  372.                     curr_column++;
  373.                     }
  374.                 else
  375.                     done = TRUE; /* found trailing quote - end */
  376.                 }
  377.             }
  378.  
  379.             /* if valid character in string, add it to buffer */
  380.             if (!error)
  381.                 {
  382.                 if (!(done && del_quotes))
  383.                     add_sym_buff(curr_ch);
  384.                 }
  385.             curr_column++;
  386.         } /*while*/
  387.  
  388.     if (!error)
  389.         got_token = TRUE;
  390.  
  391.     } /* do_str_literal() */
  392.  
  393.  
  394. /*================[ do_special_syms ]=================*/
  395.  
  396. static VOID do_special_syms()
  397.  
  398.     { /* do_special_syms() */
  399.  
  400. /*
  401. debug(printf("do_special_syms:\n");)
  402. */
  403.     /*
  404.     the token type for special symbols is the ASCII code of
  405.     that symbol, unless it is a multi-char. symbol, in which
  406.     case the results of the following line are changed later
  407.     */
  408.     token.tok_type = curr_ch;
  409.  
  410.     /*
  411.     now handle each character type
  412.     */
  413.     switch (curr_ch)
  414.         {
  415.         case Open_paren:
  416.             if (curr_line[curr_column++]==Asterisk)
  417.                 {
  418.                 in_comment = curr_ch;    /* just to keep track of which comment type.. */
  419. /*debug(puts("(*-- entering comment");)*/
  420.                 }
  421.             else
  422.                 {
  423.                 curr_column--;
  424.                 got_token = TRUE;
  425.                 }
  426.             break;
  427.  
  428.         case Open_brace:
  429.             in_comment = curr_ch;    /* just to keep track of which comment type.. */
  430. /*debug(puts("{-- entering comment");)*/
  431.             break;
  432.             
  433.         case Dollar_sign:
  434. debug(printf(" got $, check next digit='%c'\n",curr_line[curr_column]);)
  435.             if (in_numberset(tolower(curr_line[curr_column]),
  436.                             curr_line[curr_column+1],
  437.                             TRUE, B_hex))
  438.                 {
  439.                 do_number();
  440.                 token.tok_type = TOK_NCONST;
  441.                 }
  442.             else
  443.                 got_token = TRUE;
  444.             break;
  445.             
  446.         case Minus: /* TEMPORARY SMARTS.. SOMEDAY THIS SHOULD BE DONE IN PARSER! */
  447. /*
  448. debug(printf(" got MINUS: '%c,%c,%c'\n",\
  449.     curr_line[curr_column],curr_line[curr_column+1],curr_line[curr_column+2]);)
  450. */
  451.             if ((curr_line[curr_column] != Eoln) &&
  452.                 isdigit(curr_line[curr_column]))
  453.                 {
  454.                 do_number();
  455.                 token.tok_type = TOK_NCONST;
  456.                 }
  457.             else
  458.                 got_token = TRUE;
  459.             break;
  460.             
  461.         case Equals:
  462.         case Pound_sign:
  463.         default:
  464. /*------ allow any characters for now, since there aint no parser yet..
  465.             error = TERR_BAD_CHR;
  466. -----------------------------------------------*/ got_token = TRUE;
  467.             break;
  468.         } /*switch*/
  469.  
  470.     } /* do_special_syms() */
  471.  
  472.  
  473. /*=================[ get_token ]===================*/
  474.  
  475. int get_token()
  476.  
  477.     { /* get_token() */
  478.  
  479.     if (pushed_token)
  480.         { /* retrieve previous token & exit */
  481.         token         = prev_token;
  482.         pushed_token = FALSE;
  483. debug(printf(" get prev. token\n");)
  484.         return(0); /* no error */
  485.         }
  486.  
  487.     got_token            = FALSE;
  488.     error                = 0;
  489.     token.tok_type        = 0;
  490.     token.tok_value     = 0;
  491.     token.tok_string[0] = 0;
  492.     do    {
  493.         /*
  494.         skip if still in comment from previous call
  495.         */
  496.         if (in_comment != 0)
  497.             {
  498.             while (in_comment &&
  499.                    (curr_ch = curr_line[curr_column]) != Eoln)
  500.                 {
  501. /*
  502. debug(printf("--looking for end comment'%c', at [%d]='%c'\n",\
  503.         in_comment, curr_column, curr_ch);)
  504. */
  505.                 curr_column++;
  506.                 switch (in_comment)
  507.                     {
  508.                     case Open_paren:
  509.                         if (curr_ch == Asterisk)
  510.                             {
  511.                             curr_ch = curr_line[curr_column];
  512.                             if (curr_ch == Close_paren)
  513.                                 {
  514.                                 in_comment = FALSE;
  515.                                 curr_column++;
  516. /*debug(puts("--*) exiting comment");)*/
  517.                                 }
  518.                             }
  519.                         break;
  520.                         
  521.                     case Open_brace:
  522.                         if (curr_ch == Close_brace)
  523.                             {
  524. /*debug(puts("--} exiting comment");)*/
  525.                             in_comment = FALSE;
  526.                             }
  527.                         break;
  528.                         
  529.                     default:
  530.                         break;
  531.                     } /* switch */
  532.                 }
  533.             if (in_comment) /* then must have hit end of line */
  534.                 {
  535.                 error = TERR_EOLN;
  536.                 token.tok_column = curr_column;
  537.                 curr_column = 0;
  538.                 }
  539.             } /* if in_comment */
  540.  
  541.         /*
  542.         skip leading spaces/tabs or trailing \n or \r
  543.         */
  544.         if (!error)
  545.             {
  546.             while (curr_line[curr_column] == Space ||
  547.                    curr_line[curr_column] == Tab ||
  548.                    curr_line[curr_column] == CReturn ||
  549.                    curr_line[curr_column] == Newline)
  550.                 {
  551.                 curr_column++;
  552.                 }
  553.             if (curr_line[curr_column] == Eoln)
  554.                 { /* hit end of line */
  555.                 error = TERR_EOLN;
  556.                 token.tok_column = curr_column;
  557.                 curr_column = 0;
  558.                 }
  559.             }
  560.  
  561.         /*
  562.         now at 1st character of new token, handle it
  563.         */
  564.         if (!error)
  565.             { /* if !error */
  566.             curr_ch = curr_line[curr_column++];
  567.             token.tok_column = curr_column;
  568.             if (!case_sensitive)
  569.                 curr_ch = tolower(curr_ch);
  570.             init_sym_buff();
  571.             add_sym_buff(curr_ch);
  572.             if (!error)
  573.                 {
  574.                 if (isalpha(curr_ch) || curr_ch==Underscore)
  575.                     {
  576.                     do_identifier();
  577.                     token.tok_type = TOK_ID;
  578.                     }
  579.                 else
  580.                 if (isdigit(curr_ch))
  581.                     {
  582.                     do_number();
  583.                     token.tok_type = TOK_NCONST;
  584.                     }
  585.                 else
  586.                 if (curr_ch==Single_quote)
  587.                     {
  588.                     do_str_literal();
  589.                     token.tok_type    = TOK_SCONST;
  590.                     token.tok_value = (unsigned int) token.tok_string[0];
  591.                     }
  592.                 else
  593.                 if (curr_ch==Double_quote)
  594.                     {
  595.                     do_str_literal();
  596.                     token.tok_type = TOK_CCONST; /* the infamous "you'll never see this!" */
  597.                     }
  598.                 else
  599.                     { /* check for special symbols */
  600.                     do_special_syms();
  601.                     }
  602.                 }
  603.             } /* if !error */
  604.  
  605.         } while (!(error || got_token));
  606.  
  607. /*
  608. printf("get_token:typ=%d col=%d val=%ld str='%s' err=%d\n",
  609. token.tok_type,token.tok_column,token.tok_value,token.tok_string,error);
  610. */
  611.     return (error);
  612.     } /* get_token() */
  613.