/* This file is part of the KDE libraries
Copyright (C) 1997 Martin Jones (mjones@kde.org)
(C) 1997 Torben Weis (weis@kde.org)
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
*/
//-----------------------------------------------------------------------------
//
// KDE HTML Widget
//
#ifndef HTMLTOKEN_H
#define HTMLTOKEN_H
class StringTokenizer;
class HTMLTokenizer;
#include <qlist.h>
#include <qstrlist.h>
#include <qarray.h>
#include "jscript.h"
// Every tag as deliverd by HTMLTokenizer starts with TAG_ESCAPE. This way
// you can devide between tags and words.
#define TAG_ESCAPE 13
// The count of spaces used for each tab.
#define TAB_SIZE 8
typedef char * TokenPtr;
//-----------------------------------------------------------------------------
class BlockingToken
{
public:
enum TokenType { Table, FrameSet, Script, Cell };
BlockingToken( TokenType tt, TokenPtr t )
{ ttype = tt; tok = t; }
TokenPtr token()
{ return tok; }
const char *tokenName();
protected:
TokenType ttype;
TokenPtr tok;
};
//-----------------------------------------------------------------------------
class HTMLTokenBuffer
{
public:
TokenPtr first()
{ return (TokenPtr) data; }
protected:
char data[1];
};
class HTMLTokenizer
{
public:
HTMLTokenizer( KHTMLWidget *_widget = 0L );
~HTMLTokenizer();
void begin();
void write( const char * );
void end();
char* nextToken();
bool hasMoreTokens();
void first();
protected:
void reset();
void addPending();
void appendToken( const char *t, int len );
void appendTokenBuffer( int min_size);
void nextTokenBuffer(); // Move curr to next tokenBuffer
protected:
// Internal buffers
///////////////////
char *buffer;
char *dest;
// the size of buffer
int size;
// Token List
/////////////
QList<HTMLTokenBuffer> tokenBufferList;
TokenPtr last; // Last token appended
TokenPtr next; // Token written next
int tokenBufferSizeRemaining; // The size remaining in the buffer written to
TokenPtr curr; // Token read next
unsigned int tokenBufferCurrIndex; // Index of HTMLTokenBuffer used by next read.
// Tokenizer flags
//////////////////
// are we in a html tag
bool tag;
// are we in quotes within a html tag
int tquote;
typedef enum
{
NonePending = 0,
SpacePending,
LFPending,
TabPending
} HTMLPendingType;
// To avoid multiple spaces
HTMLPendingType pending;
typedef enum
{
NoneDiscard = 0,
SpaceDiscard,
LFDiscard
} HTMLDiscardType;
// Discard line breaks immediately after start-tags
// Discard spaces after '=' within tags
HTMLDiscardType discard;
// Discard the LF part of CRLF sequence
bool skipLF;
// Flag to say that we have the '<' but not the character following it.
// Used to decide whether we will get a <TAG> or </TAG>
// In case of a </TAG> we ignore pending LFs.
// In case of a <TAG> we add any pending LF as a space.
// If the character following is not '/', 'a..z', 'A..Z' or '!'
// the tag is inserted as text
bool startTag;
// Are we in a <title> ... </title> block
bool title;
// Are we in a <pre> ... </pre> block
bool pre;
// if 'pre == true' we track in which column we are
int prePos;
// Are we in a <script> ... </script> block
bool script;
// Are we in a <style> ... </style> block
bool style;
// Are we in a <select> ... </select> block
bool select;
// Are we in a &... character entity description?
bool charEntity;
// Area we in a <!-- comment --> block
bool comment;
// Are we in a <textarea> ... </textarea> block
bool textarea;
// Used to store the code of a srcipting sequence
char *scriptCode;
// Size of the script sequenze stored in @ref #scriptCode
int scriptCodeSize;
// Maximal size that can be stored in @ref #scriptCode
int scriptCodeMaxSize;
// Stores characters if we are scanning for a string like "</script>"
char searchBuffer[ 10 ];
// Counts where we are in the string we are scanning for
int searchCount;
// The string we are searching for
const char *searchFor;
KHTMLWidget *widget;
/**
* This pointer is 0L until used. The @ref KHTMLWidget has an instance of
* this class for us. We ask for it when we see some JavaScript stuff for
* the first time.
*/
JSEnvironment* jsEnvironment;
// These are tokens for which we are awaiting ending tokens
QList<BlockingToken> blocking;
};
inline void HTMLTokenizer::appendToken( const char *t, int len )
{
if ( len < 1 )
return;
if (len >= tokenBufferSizeRemaining)
{
// We need a new buffer
appendTokenBuffer( len);
}
last = next; // Last points to the start of the token we are going to append
tokenBufferSizeRemaining -= len+1; // One for the null-termination
while (len--)
{
*next++ = *t++;
}
*next++ = '\0';
}
inline char* HTMLTokenizer::nextToken()
{
if (!curr)
return NULL;
char *t = (char *) curr;
curr += strlen(curr)+1;
if ((curr != next) && (*curr == '\0'))
{
// End of HTMLTokenBuffer, go to next buffer.
nextTokenBuffer();
}
return t;
}
inline bool HTMLTokenizer::hasMoreTokens()
{
if ( !blocking.isEmpty() &&
blocking.getFirst()->token() == curr )
{
return false;
}
return ( ( curr != 0 ) && (curr != next) );
}
//-----------------------------------------------------------------------------
class StringTokenizer
{
public:
StringTokenizer();
~StringTokenizer();
void tokenize( const char *, const char * );
const char* nextToken();
bool hasMoreTokens() { return ( pos != 0 ); }
protected:
char *pos;
char *end;
char *buffer;
int bufLen;
};
#endif // HTMLTOKEN
Documentation generated by root@darkstar.lst.de on Wed Sep 8 17:38:46 CEST 1999