home *** CD-ROM | disk | FTP | other *** search
- /* 9th April, 1993 Mayan Moudgill
- * Basically is given a file. If asked it will try and scan one of the
- * following tokens from the file.
- * strings: "([^"\\\n]|(\.))*"
- * identifier: [A-Za-z_0-9]+
- * integer: [-+]?[0-9]+
- * character: .
- * These returns a Token.
- * It can also accept a mark, which returns a Mark, and a reject which
- * when given a Mark, rolls back the state to the Mark, and starts matching
- * from that point onwards. Also, for convenience it can rollback the
- * last token. Also, it can match a character and a character-string.
- * (these are equivalent to trying to scanning a character/string,
- * and rolling back if the character/string does not exactly match the
- * argument).
- *
- * The other facility that the function provides is that it keeps track
- * of the number of lines read in.
- *
- * I can think of at least of two ways of implementing the tokenizing
- * and rollback mechanisms: reading files, and mmap'ing them. This
- * implementation uses mmap.
- */
-
- #include <osfcn.h>
- #include <stdlib.h>
- #include <iostream.h>
-
- #include "Token.H"
-
- class Scan {
- public:
- class Mark {
- friend class Scan;
- private:
- int _nl;
- char * _at;
- };
- private:
- char _name[256]; // file name
- int _fd; // file descriptor
- char * _file; // the pointer to the beginning of the mmap'd region
- long _size; // the file size
- char * _end; // end of file
-
- int _close; // has it not been opened?
- int _eof; // seen eof?
- char * _at; // file pointer
- int _nl; // new lines read in
- char * _oat; // previous file pointer (for rollback)
- int _onl; // previous new lines
-
- char * _wat;
- char * _tat;
- int _wnl;
- int _tnl;
-
- private:
- int _start()
- {
- _wat = _tat = _at;
- _wnl = _tnl = _nl;
- return _at == _end || _close;
- }
- int _get()
- {
- int c;
- if( _tat == _end ) {
- c = EOF;
- }
- else {
- c = *_tat++;
- if( c == '\n' ) {
- _tnl++;
- }
- }
- return c;
- }
- void _backup()
- {
- _tat--;
- if( *_tat == '\n') {
- _tnl--;
- }
- }
- void _commit()
- {
- _oat = _at;
- _onl = _nl;
- _at = _tat;
- _nl = _tnl;
- }
- int _space();
- public:
- Scan();
- Scan(char * name);
- ~Scan();
- int line()
- { return _nl; }
- Mark mark()
- {
- Mark mark;
- mark._nl = _nl;
- mark._at = _at;
- return mark;
- }
- void back( Mark& mark)
- {
- _onl = _nl = mark._nl;
- _oat = _at = mark._at;
- }
- void back()
- {
- _nl = _onl;
- _at = _oat;
- }
- /* now for the actual scan routines */
- /* scan_char & scan_string are special cases */
- int match(char c, Token& result);
- int match(char *, Token&);
-
- int number(Token&);
- int string(Token&);
- int identifier(Token&);
- int token(Token&);
- int character(Token& result);
- int eof();
- };
-