home *** CD-ROM | disk | FTP | other *** search
/ Monster Media 1994 #1 / monster.zip / monster / OS2 / CPOSTSRC.ZIP / CTOK.C < prev    next >
Text File  |  1992-02-24  |  17KB  |  550 lines

  1. /*------------------------------------------------------------------
  2.  * ctok : C language tokenizer
  3.  *------------------------------------------------------------------
  4.  * 10-01-91 Patrick J. Mueller
  5.  *------------------------------------------------------------------*/
  6.  
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <ctype.h>
  11.  
  12. #include "ctok.h"
  13.  
  14. /*------------------------------------------------------------------
  15.  * is a character a valid character in a C identifier
  16.  *------------------------------------------------------------------*/
  17. #define isCsymbol(c) (isalnum(c) || ('_' == c))
  18.  
  19. /*------------------------------------------------------------------
  20.  * typedefs
  21.  *------------------------------------------------------------------*/
  22. typedef struct
  23.    {
  24.    int            eof;
  25.    char          *buffer;
  26.    long           bufferLen;
  27.    long           bufferInd;
  28.    long           fileOffs;
  29.    long           line;
  30.    int            unGetChar;
  31.    int            unGetReady;
  32.    long           tokOffs;
  33.    long           tokLen;
  34.    CTokRead       readFunc;
  35.    void          *readInfo;
  36.    char           ident[MAX_IDENT_LEN+1];
  37.    } CTokInfo;
  38.  
  39. /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  40. /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  41.  
  42. /*------------------------------------------------------------------
  43.  * get next char from file
  44.  *------------------------------------------------------------------*/
  45. void GetNextChar(
  46.    int         *c,
  47.    CTokInfo    *cti
  48.    )
  49.    {
  50.    cti->fileOffs++;
  51.  
  52.    /*---------------------------------------------------------------
  53.     * check for end of file
  54.     *---------------------------------------------------------------*/
  55.    if (cti->eof)
  56.       {
  57.       *c = EOF;
  58.       return;
  59.       }
  60.  
  61.    /*---------------------------------------------------------------
  62.     * check for a char in the unget holder
  63.     *---------------------------------------------------------------*/
  64.    if (cti->unGetReady)
  65.       {
  66.       cti->unGetReady = 0;
  67.       *c = cti->unGetChar;
  68.  
  69.       if ('\n' == *c)
  70.          cti->line++;
  71.       return;
  72.       }
  73.  
  74.    /*---------------------------------------------------------------
  75.     * see if we need to read another buffer
  76.     *---------------------------------------------------------------*/
  77.    if (cti->bufferInd == cti->bufferLen)
  78.       {
  79.       cti->bufferLen = cti->readFunc(cti->readInfo,&(cti->buffer));
  80.       cti->bufferInd = 0L;
  81.  
  82.       if (0L == cti->bufferLen)
  83.          {
  84.          *c = EOF;
  85.          cti->eof = 1;
  86.          return;
  87.          }
  88.       }
  89.  
  90.    /*---------------------------------------------------------------
  91.     * read character from buffer
  92.     *---------------------------------------------------------------*/
  93.    *c = cti->buffer[cti->bufferInd++];
  94.  
  95.    if ('\n' == *c)
  96.       cti->line++;
  97.  
  98.    return;
  99.    }
  100.  
  101. /*------------------------------------------------------------------
  102.  * put back last char from file
  103.  *------------------------------------------------------------------*/
  104. void UnGetNextChar(
  105.    int          c,
  106.    CTokInfo    *cti
  107.    )
  108.    {
  109.    cti->fileOffs--;
  110.  
  111.    cti->unGetChar  = c;
  112.    cti->unGetReady = 1;
  113.  
  114.    if ('\n' == c)
  115.       cti->line--;
  116.    }
  117.  
  118. /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  119. /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  120.  
  121. /*------------------------------------------------------------------
  122.  * read a C character constant or string
  123.  *------------------------------------------------------------------*/
  124. static void ReadString(
  125.    CTokInfo    *cti,
  126.    int          c
  127.    )
  128.    {
  129.    int stop;
  130.  
  131.    /*---------------------------------------------------------------
  132.     * the character passed in is ' or ", and it is the character that
  133.     * signifies the end of the string
  134.     *---------------------------------------------------------------*/
  135.    stop = c;
  136.  
  137.    /*---------------------------------------------------------------
  138.     * keep going until we hit our stop character
  139.     *---------------------------------------------------------------*/
  140.    GetNextChar(&c,cti);
  141.    while (stop != c)
  142.       {
  143.       /*------------------------------------------------------------
  144.        * for a \, inhale next character
  145.        *------------------------------------------------------------*/
  146.       if ('\\' == c)
  147.          GetNextChar(&c,cti);
  148.  
  149.       /*------------------------------------------------------------
  150.        * for EOF, break
  151.        *------------------------------------------------------------*/
  152.       if (EOF == c)
  153.          break;
  154.  
  155.       GetNextChar(&c,cti);
  156.       }
  157.  
  158.    return;
  159.    }
  160.  
  161. /*------------------------------------------------------------------
  162.  * read a C comment
  163.  *------------------------------------------------------------------*/
  164. static void ReadComment(
  165.    CTokInfo    *cti
  166.    )
  167.    {
  168.    int c;
  169.  
  170.    /*---------------------------------------------------------------
  171.     * loop until end of file (or return in middle)
  172.     *---------------------------------------------------------------*/
  173.    GetNextChar(&c,cti);
  174.    while (EOF != c)
  175.       {
  176.  
  177.       /*------------------------------------------------------------
  178.        * if not *, just get next character
  179.        *------------------------------------------------------------*/
  180.       if ('*' != c)
  181.          GetNextChar(&c,cti);
  182.  
  183.       /*------------------------------------------------------------
  184.        * got a * - see if next is /
  185.        *------------------------------------------------------------*/
  186.       else
  187.          {
  188.          /*---------------------------------------------------------
  189.           * if next is /, return
  190.           *---------------------------------------------------------*/
  191.          GetNextChar(&c,cti);
  192.          if ('/'  == c)
  193.             return;
  194.          }
  195.  
  196.       }
  197.  
  198.    return;
  199.    }
  200.  
  201. /*------------------------------------------------------------------
  202.  * read a C++ style comment
  203.  *------------------------------------------------------------------*/
  204. static void ReadCppComment(
  205.    CTokInfo    *cti
  206.    )
  207.    {
  208.    int c;
  209.  
  210.    /*---------------------------------------------------------------
  211.     * loop until end of line or end of file
  212.     *---------------------------------------------------------------*/
  213.    GetNextChar(&c,cti);
  214.  
  215.    while ((EOF != c) && ('\n' != c))
  216.       GetNextChar(&c,cti);
  217.  
  218.    UnGetNextChar(c,cti);
  219.    return;
  220.    }
  221.  
  222. /*------------------------------------------------------------------
  223.  * read an identifier
  224.  *------------------------------------------------------------------*/
  225. static void ReadIdent(
  226.    CTokInfo    *cti,
  227.    int          c
  228.    )
  229.    {
  230.    int identLen;
  231.  
  232.    /*---------------------------------------------------------------
  233.     * initialize length and stick first char in
  234.     *---------------------------------------------------------------*/
  235.    identLen = 0;
  236.    cti->ident[identLen++] = (char) c;
  237.  
  238.    /*---------------------------------------------------------------
  239.     * while still a valid symbol character ...
  240.     *---------------------------------------------------------------*/
  241.    GetNextChar(&c,cti);
  242.    while (isCsymbol(c))
  243.       {
  244.       /*------------------------------------------------------------
  245.        * make sure we got enough room, then stick it in
  246.        *------------------------------------------------------------*/
  247.       if (identLen < MAX_IDENT_LEN)
  248.          cti->ident[identLen++] = (char) c;
  249.  
  250.       GetNextChar(&c,cti);
  251.       }
  252.  
  253.    /*---------------------------------------------------------------
  254.     * finish up identifier, put last character back
  255.     *---------------------------------------------------------------*/
  256.    cti->ident[identLen] = '\0';
  257.    UnGetNextChar(c,cti);
  258.    }
  259.  
  260. /*------------------------------------------------------------------
  261.  * read a number
  262.  *------------------------------------------------------------------*/
  263. static void ReadNumber(
  264.    CTokInfo    *cti,
  265.    int          c
  266.    )
  267.    {
  268.  
  269.    /*---------------------------------------------------------------
  270.     * while still a valid number character ...
  271.     *---------------------------------------------------------------*/
  272.    GetNextChar(&c,cti);
  273.    while (isalnum(c))
  274.       GetNextChar(&c,cti);
  275.  
  276.    /*---------------------------------------------------------------
  277.     * put last character back
  278.     *---------------------------------------------------------------*/
  279.    UnGetNextChar(c,cti);
  280.    }
  281.  
  282. /*------------------------------------------------------------------
  283.  * read a preprocessor statement
  284.  *------------------------------------------------------------------*/
  285. static void ReadPreprocessor(
  286.    CTokInfo    *cti
  287.    )
  288.    {
  289.    int c;
  290.  
  291.    /*---------------------------------------------------------------
  292.     * loop until end of file (or return in middle)
  293.     *---------------------------------------------------------------*/
  294.    GetNextChar(&c,cti);
  295.    while (EOF != c)
  296.       {
  297.       /*------------------------------------------------------------
  298.        * if we found a newline, leave
  299.        *------------------------------------------------------------*/
  300.       if ('\n' == c)
  301.          {
  302.          UnGetNextChar(c,cti);
  303.          return;
  304.          }
  305.  
  306.       /*------------------------------------------------------------
  307.        * if we got anything but a \, eat it
  308.        *------------------------------------------------------------*/
  309.       else if ('\\' != c)
  310.          GetNextChar(&c,cti);
  311.  
  312.       /*------------------------------------------------------------
  313.        * got a \ - see if next is \n
  314.        *------------------------------------------------------------*/
  315.       else
  316.          {
  317.          /*---------------------------------------------------------
  318.           * if next isn't \n, start at top of loop
  319.           *---------------------------------------------------------*/
  320.          GetNextChar(&c,cti);
  321.  
  322.          /*---------------------------------------------------------
  323.           * skip over white space first
  324.           *---------------------------------------------------------*/
  325.          while (isspace(c) && ('\n' != c))
  326.             GetNextChar(&c,cti);
  327.  
  328.          if ('\n' != c)
  329.             continue;
  330.  
  331.          /*---------------------------------------------------------
  332.           * if it is a \n, read next char and continue
  333.           *---------------------------------------------------------*/
  334.          GetNextChar(&c,cti);
  335.          continue;
  336.          }
  337.  
  338.       }
  339.  
  340.    return;
  341.    }
  342.  
  343. /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  344. /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  345.  
  346. /*------------------------------------------------------------------
  347.  * tokenizer
  348.  *------------------------------------------------------------------*/
  349.  
  350. static int GetToken(
  351.    CTokInfo    *cti
  352.    )
  353.    {
  354.    int           c;
  355.    int           type;
  356.    unsigned long offsStart;
  357.  
  358.    /*---------------------------------------------------------------
  359.     * read next character
  360.     *---------------------------------------------------------------*/
  361.    GetNextChar(&c,cti);
  362.  
  363.    /*---------------------------------------------------------------
  364.     * skip white space
  365.     *---------------------------------------------------------------*/
  366.    while (isspace(c))
  367.       GetNextChar(&c,cti);
  368.  
  369.    /*---------------------------------------------------------------
  370.     * save starting offset
  371.     *---------------------------------------------------------------*/
  372.    offsStart = cti->fileOffs;
  373.  
  374.    /*---------------------------------------------------------------
  375.     * empty identifier
  376.     *---------------------------------------------------------------*/
  377.    memset(cti->ident,'\0',sizeof(cti->ident));
  378.  
  379.    /*---------------------------------------------------------------
  380.     * big switch on it's value
  381.     *---------------------------------------------------------------*/
  382.    switch(c)
  383.       {
  384.       /*------------------------------------------------------------
  385.        * check for end of file
  386.        *------------------------------------------------------------*/
  387.       case EOF:
  388.          type = TOKEN_EOF;
  389.          break;
  390.  
  391.       /*------------------------------------------------------------
  392.        * for pound sign, read preprocessor directive
  393.        *------------------------------------------------------------*/
  394.       case '#':
  395.          ReadPreprocessor(cti);
  396.          type = TOKEN_PREPROC;
  397.          break;
  398.  
  399.       /*------------------------------------------------------------
  400.        * single or double quote
  401.        *------------------------------------------------------------*/
  402.       case '\'':
  403.       case '"':
  404.          ReadString(cti,c);
  405.          type = TOKEN_STRING;
  406.          break;
  407.  
  408.       /*------------------------------------------------------------
  409.        * start of comment?
  410.        *------------------------------------------------------------*/
  411.       case '/':
  412.          /*---------------------------------------------------------
  413.           * get next char - if *, read to end of comment
  414.           *---------------------------------------------------------*/
  415.          GetNextChar(&c,cti);
  416.          if ('*' == c)
  417.             {
  418.             ReadComment(cti);
  419.             type = TOKEN_COMMENT;
  420.             }
  421.  
  422.          /*---------------------------------------------------------
  423.           * see if it's a C++ style comment
  424.           *---------------------------------------------------------*/
  425.          else if ('/' == c)
  426.             {
  427.             ReadCppComment(cti);
  428.             type = TOKEN_COMMENT;
  429.             }
  430.  
  431.          /*---------------------------------------------------------
  432.           * otherwise it's just a plain /
  433.           *---------------------------------------------------------*/
  434.          else
  435.             {
  436.             UnGetNextChar(c,cti);
  437.             type = TOKEN_OPER;
  438.             }
  439.  
  440.          break;
  441.  
  442.       /*------------------------------------------------------------
  443.        * everything else - identifiers and punctuation
  444.        *------------------------------------------------------------*/
  445.       default:
  446.          if (isCsymbol(c) && !isdigit(c))
  447.             {
  448.             ReadIdent(cti,c);
  449.             type = TOKEN_IDENT;
  450.             }
  451.  
  452.          else if (isdigit(c))
  453.             {
  454.             ReadNumber(cti,c);
  455.             type = TOKEN_NUMBER;
  456.             }
  457.  
  458.          /*---------------------------------------------------------
  459.           * anything else
  460.           *---------------------------------------------------------*/
  461.          else
  462.             {
  463.             type = TOKEN_OPER;
  464.             cti->ident[0] = (char) c;
  465.             }
  466.  
  467.          break;
  468.       }
  469.  
  470.    cti->tokOffs = offsStart;
  471.    cti->tokLen  = cti->fileOffs - offsStart + 1;
  472.    return(type);
  473.    }
  474.  
  475. /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  476. /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  477.  
  478.  
  479. /*------------------------------------------------------------------
  480.  * Initializer
  481.  *------------------------------------------------------------------*/
  482. void *CTokInit(
  483.    CTokRead  readFunc,
  484.    void     *readInfo
  485.    )
  486.    {
  487.    CTokInfo *cti;
  488.  
  489.    /*---------------------------------------------------------------
  490.     * allocate space for structure
  491.     *---------------------------------------------------------------*/
  492.    cti = malloc(sizeof(CTokInfo));
  493.    if (NULL == cti)
  494.       return NULL;
  495.  
  496.    /*---------------------------------------------------------------
  497.     * initialize structure
  498.     *---------------------------------------------------------------*/
  499.    cti->eof         = 0;
  500.    cti->buffer      = NULL;
  501.    cti->bufferLen   = 0L;
  502.    cti->bufferInd   = 0L;
  503.    cti->fileOffs    = -1L;
  504.    cti->line        = 1;
  505.    cti->unGetChar   = '\0';
  506.    cti->unGetReady  = 0;
  507.    cti->tokOffs     = 0L;
  508.    cti->tokLen      = 0L;
  509.    cti->readFunc    = readFunc;
  510.    cti->readInfo    = readInfo;
  511.    memset(cti->ident,'\0',sizeof(cti->ident));
  512.  
  513.    return cti;
  514.    }
  515.  
  516. /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  517. /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  518.  
  519. /*------------------------------------------------------------------
  520.  * Terminator
  521.  *------------------------------------------------------------------*/
  522. void CTokTerm(
  523.    void *handle
  524.    )
  525.    {
  526.    free(handle);
  527.    }
  528.  
  529. /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  530. /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  531.  
  532. /*------------------------------------------------------------------
  533.  * Tokenizer
  534.  *------------------------------------------------------------------*/
  535. void CTokGet(
  536.    void     *handle,
  537.    Token    *token
  538.    )
  539.    {
  540.    CTokInfo *cti;
  541.  
  542.    cti = handle;
  543.  
  544.    token->type  = GetToken(cti);
  545.    token->offs  = cti->tokOffs;
  546.    token->len   = cti->tokLen;
  547.    token->ident = cti->ident;
  548.    token->line  = cti->line;
  549.    }
  550.