home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
World_Of_Computer_Software-02-387-Vol-3of3.iso
/
p
/
plbin.zip
/
pl
/
src
/
pl-read.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-02-23
|
36KB
|
1,507 lines
/* pl-read.c,v 1.6 1993/02/23 13:16:44 jan Exp
Copyright (c) 1990 Jan Wielemaker. All rights reserved.
See ../LICENCE to find out about your rights.
jan@swi.psy.uva.nl
Purpose: read/1, 2
*/
#include "pl-incl.h"
#include "pl-ctype.h"
#include <math.h>
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This module defines the Prolog parser. Reading a term takes two passes:
* Reading the term into memory, deleting multiple blanks, comments
etc.
* Parsing this string into a Prolog term.
The separation has two reasons: we can call the first part separately
and insert the read strings in the history and we can produce better
error messages as the parsed part of the source is available.
The raw reading pass is quite tricky as PCE requires us to allow for
callbacks from C during this process and the callback might invoke
another read. Notable raw reading needs to be studied studied once more
as it takes about 30% of the entire compilation time and looks
promissing for optimisations. It also could be made a bit more
readable.
This module is considerably faster when compiled with GCC, using the
-finline-functions option.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
forwards void startRead P((void));
forwards void stopRead P((void));
forwards void errorWarning P((char *));
forwards void singletonWarning P((Atom *, int));
forwards void clearBuffer P((void));
forwards void addToBuffer P((char));
forwards void delBuffer P((void));
forwards void extendBeep P((void));
forwards void extendDeleteEscape P((void));
forwards void extendDeleteEof P((void));
forwards void extendReprint P((bool));
forwards Char getchr P((void));
forwards char * raw_read2 P((void));
forwards char * raw_read P((void));
typedef struct token * Token;
typedef struct variable * Variable;
struct token
{ int type; /* type of token */
union
{ word prolog; /* a Prolog value */
char character; /* a punctuation character (T_PUNCTUATION) */
Variable variable; /* a variable record (T_VARIABLE) */
} value; /* value of token */
};
struct variable
{ Word address; /* address of variable */
char * name; /* name of variable */
int times; /* number of occurences */
Variable next; /* next of chain */
};
#define T_FUNCTOR 0 /* name of a functor (atom, followed by '(') */
#define T_NAME 1 /* ordinary name */
#define T_VARIABLE 2 /* variable name */
#define T_VOID 3 /* void variable */
#define T_REAL 4 /* realing point number */
#define T_INTEGER 5 /* integer */
#define T_STRING 6 /* "string" */
#define T_PUNCTUATION 7 /* punctuation character */
#define T_FULLSTOP 8 /* Prolog end of clause */
extern int Input; /* current input stream (from pl-file.c) */
static char *here; /* current character */
static char *base; /* base of clause */
static char *token_start; /* start of most recent read token */
static struct token token; /* current token */
static bool unget = FALSE; /* unget_token() */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
The reading function (raw_read()) can be called recursively via the
notifier when running under notifier based packages (like O_PCE). To
avoid corruption of the database we push the read buffer rb on a stack
and pop in back when finished. See raw_read() and raw_read2().
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#define RBSIZE 512 /* initial size of read buffer */
#define MAX_READ_NESTING 5 /* nesting of read (O_PCE only) */
static
struct read_buffer
{ int size; /* current size of read buffer */
int left; /* left space in read buffer */
char *base; /* base of read buffer */
char *here; /* current position in read buffer */
int stream; /* stream we are reading from */
FILE *fd; /* file descriptor we are reading from */
bool doExtend; /* extension mode on? */
} rb;
#if O_PCE
static struct read_buffer rb_stack[MAX_READ_NESTING];
int read_nesting = 0; /* current nesting level */
#endif /* O_PCE */
void
resetRead()
{
#if O_PCE
read_nesting = 0;
#endif
}
static
void
startRead()
{
#if O_PCE
if (read_nesting >= MAX_READ_NESTING)
{ warning("Read stack too deeply nested");
pl_abort();
}
rb_stack[read_nesting++] = rb;
rb = rb_stack[read_nesting];
#endif /* O_PCE */
rb.doExtend = (Input == 0 && status.notty == FALSE);
rb.stream = Input;
rb.fd = checkInput(rb.stream);
source_file_name = currentStreamName();
}
static void
stopRead()
{
#if O_PCE
rb_stack[read_nesting] = rb;
rb = rb_stack[--read_nesting];
if (read_nesting < 0)
fatalError("Read stack underflow???");
if (read_nesting > 0)
{ rb.fd = checkInput(rb.stream);
/*source_file_name = currentStreamName();*/
}
#endif /* O_PCE */
}
/********************************
* ERROR HANDLING *
*********************************/
#define syntaxError(what) { errorWarning(what); fail; }
static void
errorWarning(what)
char *what;
{ char c = *token_start;
if ( !ReadingSource ) /* not reading from a file */
{ fprintf(stderr, "\n[WARNING: Syntax error: %s \n", what);
*token_start = EOS;
fprintf(stderr, "%s\n** here **\n", base);
if (c != EOS)
{ *token_start = c;
fprintf(stderr, "%s]\n", token_start);
}
} else
{ char *s;
word goal = globalFunctor(FUNCTOR_exception3);
word arg;
for(s = base; s < token_start; s++ )
if ( *s == '\n' )
source_line_no++;
unifyAtomic(argTermP(goal, 0), ATOM_syntax_error);
unifyFunctor(argTermP(goal, 1), FUNCTOR_syntax_error3);
arg = argTerm(goal, 1);
unifyAtomic(argTermP(arg, 0), source_file_name);
unifyAtomic(argTermP(arg, 1), consNum(source_line_no));
unifyAtomic(argTermP(arg, 2), lookupAtom(what));
if ( callGoal(MODULE_user, goal, FALSE) == FALSE )
warning("Syntax error: %s", what);
}
}
static void
singletonWarning(vars, nvars)
Atom *vars;
int nvars;
{ word goal = globalFunctor(FUNCTOR_exception3);
word arg;
Word a;
int n;
unifyAtomic(argTermP(goal, 0), ATOM_singleton);
unifyFunctor(argTermP(goal, 1), FUNCTOR_singleton3);
arg = argTerm(goal, 1);
unifyAtomic(argTermP(arg, 0), source_file_name);
unifyAtomic(argTermP(arg, 1), consNum(source_line_no));
a = argTermP(arg, 2);
for(n=0; n<nvars; n++)
{ unifyFunctor(a, FUNCTOR_dot2);
unifyAtomic(argTermP(*a, 0), vars[n]);
a = argTermP(*a, 1);
}
unifyAtomic(a, ATOM_nil);
if ( callGoal(MODULE_user, goal, FALSE) == FALSE )
{ char buf[LINESIZ];
buf[0] = EOS;
for(n=0; n<nvars; n++)
{ if ( n > 0 )
strcat(buf, ", ");
strcat(buf, stringAtom(vars[n]));
}
warning("Singleton variables: %s", buf);
}
}
/********************************
* RAW READING *
*********************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Scan the input, give prompts when necessary and return a char * holding
a stripped version of the next term. Contigeous white space is mapped
on a single space, block and % ... \n comment is deleted. Memory is
claimed automatically en enlarged if necessary.
Earlier versions used to local stack for building the term. This does
not work with O_PCE as we might be called back via the notifier while
reading.
(char *) NULL is returned on a syntax error.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
clearBuffer()
{ if (rb.size == 0)
{ if ((rb.base = Malloc(RBSIZE)) == (char *) NULL)
fatalError("%s", OsError());
rb.size = RBSIZE;
}
SECURE( if ( rb.base == 0 ) fatalError("read/1: nesting=%d, size=%d",
read_nesting, rb.size) );
rb.left = rb.size;
base = rb.here = rb.base;
DEBUG(8, printf("Cleared read buffer.rb at %ld, base at %ld\n", &rb, rb.base));
}
#if PROTO
static void
addToBuffer(
register char c)
#else
static void
addToBuffer(c)
register char c;
#endif
{ if (rb.left-- == 0)
{ if ((rb.base = Realloc(rb.base, rb.size * 2)) == (char *)NULL)
fatalError("%s", OsError());
DEBUG(8, printf("Reallocated read buffer at %ld\n", rb.base));
base = rb.base;
rb.here = rb.base + rb.size;
rb.left = rb.size - 1;
rb.size *= 2;
}
*rb.here++ = c;
}
static void
delBuffer()
{ if ( rb.here > rb.base )
{ rb.here--;
rb.left++;
}
}
#if O_EXTEND_ATOMS
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Primitive functions to print and delete characters for atom-completion.
Should be abstracted from a bit and incorporated in the operating system
interface. As this model of atom-completion is unlikely to work on a
non-Unix machine anyway this will do for the moment.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void
extendBeep()
{ if ( status.beep == TRUE )
{ putchar(07); /* ^G: the bel */
fflush(stdout);
}
}
#if O_LINE_EDIT /* otherwise, define in md.h */
#define DEL_ESC "\b\b \b\b"
#define DEL_EOF "\b\b \b\b"
#endif /* O_LINE_EDIT */
static void
extendDeleteEscape()
{ printf(DEL_ESC);
fflush(stdout);
}
static void
extendDeleteEof()
{ printf(DEL_EOF);
fflush(stdout);
}
static void
extendReprint(reprint)
bool reprint;
{ ttybuf buf;
char *s, *start = rb.here - 1;
for(; *start != '\n' && start >= rb.base; start-- ) ;
start++;
PushTty(&buf, reprint ? TTY_APPEND : TTY_RETYPE);
for( s=start; s < rb.here; s++ )
PretendTyped(*s);
PopTty(&buf);
while(rb.here > start)
delBuffer();
fflush(stdout);
}
#endif /* O_EXTEND_ATOMS */
static Char
getchr()
{ register Char c;
if (rb.fd == (FILE *)NULL)
{ c = Get0();
base = rb.base;
} else if ((c = (Char) Getc(rb.fd)) == '\n')
newLineInput();
return c;
}
#define ensure_space(c) { if ( something_read && \
(c == '\n'|| !isBlank(rb.here[-1])) ) \
addToBuffer(c); \
}
#define set_start_line { if ( !something_read ) \
{ source_file_name = currentStreamName(); \
source_line_no = currentInputLine(); \
something_read++; \
} \
}
#define rawSyntaxError(what) { addToBuffer(EOS); \
base = rb.base, token_start = rb.here-1; \
syntaxError(what); \
}
static char *
raw_read2()
{ register Char c;
bool something_read = FALSE;
int newlines;
clearBuffer(); /* clear input buffer */
prompt(FALSE); /* give prompt */
source_line_no = -1;
for(;;)
{ c = getchr();
DEBUG(3, if ( Input == 0 ) printf("getchr() -> %d (%c)\n", c, c));
DEBUG(3, if ( Input == 0 ) printf("here = %d, base = %d", rb.here, rb.base));
if ( c == ttytab.tab.c_cc[ VEOF ] ) c = EOF ;
switch(c)
{ case EOF:
if (seeingString()) /* do not require '. ' when */
{ addToBuffer(' '); /* reading from a string */
addToBuffer('.');
addToBuffer(' ');
addToBuffer(EOS);
return rb.base;
}
if (something_read)
{
#if O_EXTEND_ATOMS
if ( rb.doExtend == TRUE )
{ char *a;
addToBuffer(EOS); /* allocates if need be */
delBuffer();
a = rb.here - 1;
for( ;a >= rb.base && isAlpha(*a); a--) ;
a++;
extendDeleteEof();
if ( a >= rb.here || !isLower(*a) )
{ extendReprint(FALSE);
extendBeep();
break;
}
Put('\n');
extendAlternatives(a);
extendReprint(TRUE);
break;
}
#endif /* O_EXTEND_ATOMS */
rawSyntaxError("Unexpected end of file");
}
e_o_f:
strcpy(rb.base, "end_of_file. ");
return rb.base;
case '*': if ( rb.here-rb.base >= 1 && rb.here[-1] == '/' )
{ register char last;
int level = 1;
rb.here--, rb.left++; /* delete read '/' */
if ((last = getchr()) == EOF)
rawSyntaxError("End of file in ``/* ... */'' comment");
for(;;)
{ switch(c = getchr())
{ case EOF:
rawSyntaxError("End of file in ``/* ... */'' comment");
case '*':
if ( last == '/' )
level++;
break;
case '/':
if ( last == '*' && --level == 0 )
{ c = ' ';
goto case_default; /* hack */
}
break;
case '\n':
if ( something_read )
addToBuffer(c);
}
last = c;
}
}
set_start_line;
addToBuffer(c);
break;
case '%': while((c=getchr()) != EOF && c != '\n') ;
if (c == EOF)
{ if (something_read)
rawSyntaxError("Unexpected end of file")
else
goto e_o_f;
}
goto case_default; /* Hack */
case '\'': if ( rb.here > rb.base && isDigit(rb.here[-1]) )
{ addToBuffer(c); /* <n>' */
if ( rb.here[-2] == '0' ) /* 0'<c> */
{ if ( (c=getchr()) != EOF )
{ addToBuffer(c);
break;
}
rawSyntaxError("Unexpected end of file");
}
break;
}
set_start_line;
newlines = 0;
addToBuffer(c);
while((c=getchr()) != EOF && c != '\'')
{ if (c == '\n' &&
newlines++ > MAXNEWLINES &&
(debugstatus.styleCheck & LONGATOM_CHECK))
rawSyntaxError("Atom too long");
addToBuffer(c);
}
if (c == EOF)
rawSyntaxError("End of file in quoted atom");
addToBuffer(c);
break;
case '"': set_start_line;
newlines = 0;
addToBuffer(c);
while((c=getchr()) != EOF && c != '"')
{ if (c == '\n' &&
newlines++ > MAXNEWLINES &&
(debugstatus.styleCheck & LONGATOM_CHECK))
rawSyntaxError("String too long");
addToBuffer(c);
}
if (c == EOF)
rawSyntaxError("End of file in string");
addToBuffer(c);
break;
#if O_EXTEND_ATOMS
case ESC: if ( rb.doExtend == TRUE )
{ char *a;
char *extend;
bool unique;
addToBuffer(EOS); /* allocates if need be */
delBuffer();
a = rb.here - 1;
for( ;a >= rb.base && isAlpha(*a); a--) ;
a++;
if ( a >= rb.here || !isLower(*a) )
{ extendDeleteEscape();
extendReprint(FALSE);
extendBeep();
break;
}
if ( (extend = extendAtom(a, &unique)) != (char *)NULL )
{ ttybuf buf;
extendDeleteEscape();
extendReprint(FALSE);
PushTty(&buf, TTY_APPEND);
while(*extend)
PretendTyped(*extend++);
PopTty(&buf);
if ( unique == FALSE )
extendBeep();
} else
{ extendDeleteEscape();
extendReprint(FALSE);
extendBeep();
}
break;
}
/*FALLTHROUGH*/
#endif /* O_EXTEND_ATOMS */
case_default: /* Hack, needs fixing */
default: if ( isBlank(c) )
{ long rd;
rd = rb.here - rb.base;
if (rd == 1 && rb.here[-1] == '.')
rawSyntaxError("Unexpected end of clause");
if (rd >= 2)
{ if ( rb.here[-1] == '.' &&
!isSymbol(rb.here[-2]) &&
!(rb.here[-2] == '\'' && rd >= 3 && rb.here[-3] == '0'))
{ ensure_space(c);
addToBuffer(EOS);
return rb.base;
}
}
ensure_space(c);
if ( c == '\n' )
prompt(TRUE);
} else
{ addToBuffer(c);
if ( c != '/' ) /* watch comment start */
set_start_line;
}
break;
}
}
}
static char *
raw_read()
{ char *s;
startRead();
#if O_EXTEND_ATOMS
if ( Input == 0 && status.notty == FALSE )
{ ttybuf buf;
PushTty(&buf, TTY_EXTEND_ATOMS);
s = raw_read2();
PopTty(&buf);
} else
#endif
s = raw_read2();
stopRead();
return s;
}
/*********************************
* VARIABLE DATABASE *
**********************************/
#define VARHASHSIZE 16
#define MAX_SINGLETONS 250
static char * allocBase; /* local allocation base */
#if !O_DYNAMIC_STACKS
static char * allocTop; /* top of allocation */
#endif
static Variable* varTable; /* hashTable for variables */
forwards void check_singletons P((void));
forwards bool bind_variables P((Word));
forwards char * alloc_var P((size_t));
forwards char * save_var_name P((char *));
forwards Variable lookupVariable P((char *));
forwards void initVarTable P((void));
static void
check_singletons()
{ register Variable var;
int n;
Atom singletons[MAX_SINGLETONS];
int i=0;
for(n=0; n<VARHASHSIZE; n++)
{ for(var = varTable[n]; var; var=var->next)
{ if (var->times == 1 && var->name[0] != '_' && i < MAX_SINGLETONS)
singletons[i++] = lookupAtom(var->name);
}
}
if ( i > 0 )
singletonWarning(singletons, i);
}
/* construct a list of Var = <name> terms wich indicate the bindings
of the variables. Anonymous variables are skipped. The result is
unified with the argument.
** Sat Apr 16 23:09:04 1988 jan@swivax.UUCP (Jan Wielemaker) */
static bool
bind_variables(bindings)
Word bindings;
{ Variable var;
int n;
word binding;
Word arg;
for(n=0; n<VARHASHSIZE; n++)
{ for(var = varTable[n]; var; var=var->next)
{ if (var->name[0] != '_')
{ binding = globalFunctor(FUNCTOR_equals2);
arg = argTermP(binding, 0);
*arg++ = (word) lookupAtom(var->name);
*arg = makeRef(var->address);
APPENDLIST(bindings, &binding);
}
}
}
CLOSELIST(bindings);
succeed;
}
static char *
alloc_var(n)
register size_t n;
{ register char *space;
n = ROUND(n, sizeof(word));
STACKVERIFY(if (allocBase + n > allocTop) outOf((Stack)&stacks.local) );
space = allocBase;
allocBase += n;
return space;
}
static char *
save_var_name(s)
char *s;
{ char *copy = alloc_var(strlen(s) + 1);
strcpy(copy, s);
return copy;
}
static Variable
lookupVariable(s)
char *s;
{ int v = stringHashValue(s, VARHASHSIZE);
Variable var;
for(var=varTable[v]; var; var=var->next)
{ if (streq(s, var->name) )
{ var->times++;
return var;
}
}
var = (Variable) alloc_var((size_t) sizeof(struct variable));
DEBUG(9, printf("Allocated var at %ld\n", var));
var->next = varTable[v];
varTable[v] = var;
var->name = save_var_name(s);
var->times = 1;
var->address = (Word) NULL;
return var;
}
static void
initVarTable()
{ int n;
allocBase = (char *)(lTop+1) + (MAXARITY+MAXVARIABLES) * sizeof(word);
#if !O_DYNAMIC_STACKS
allocTop = (char *)lMax;
#endif
varTable = (Variable *)alloc_var((size_t) sizeof(Variable)*VARHASHSIZE);
for(n=0; n<VARHASHSIZE; n++)
varTable[n] = (Variable) NULL;
}
/********************************
* TOKENISER *
*********************************/
#define skipSpaces { while(isBlank(*here) ) here++; c = *here++; }
#define unget_token() { unget = TRUE; }
forwards Token get_token P((bool));
forwards word build_term P((Atom, int, Word));
forwards bool complex_term P((char *, Word));
forwards bool simple_term P((bool, Word, bool *));
forwards bool read_term P((Word, Word, bool));
typedef union
{ long i;
real r;
} number;
#define V_ERROR 0
#define V_REAL 1
#define V_INT 2
forwards int scan_number P((char **, int, number *));
static int
scan_number(s, b, n)
char **s;
int b;
number *n;
{ int d;
n->i = 0;
while((d = digitValue(b, **s)) >= 0)
{ (*s)++;
n->i = n->i * b + d;
if ( n->i > PLMAXINT )
{ n->r = (real) n->i;
while((d = digitValue(b, **s)) >= 0)
{ (*s)++;
if ( n->r > MAXREAL / (real) b - (real) d )
return V_ERROR;
n->r = n->r * (real)b + (real)d;
}
return V_REAL;
}
}
return V_INT;
}
static Token
get_token(must_be_op)
bool must_be_op;
{ char c;
char *start;
char end;
int negative = 1;
if (unget)
{ unget = FALSE;
return &token;
}
skipSpaces;
token_start = here - 1;
switch(char_type[(unsigned)c & 0xff])
{ case LC: { start = here-1;
while(isAlpha(*here) )
here++;
c = *here;
*here = EOS;
token.value.prolog = (word)lookupAtom(start);
*here = c;
token.type = (c == '(' ? T_FUNCTOR : T_NAME);
DEBUG(9, printf("%s: %s\n", c == '(' ? "FUNC" : "NAME", stringAtom(token.value.prolog)));
return &token;
}
case UC: { start = here-1;
while(isAlpha(*here) )
here++;
c = *here;
*here = EOS;
if (start[0] == '_' && here == start + 1)
{ setVar(token.value.prolog);
DEBUG(9, printf("VOID\n"));
token.type = T_VOID;
} else
{ token.value.variable = lookupVariable(start);
DEBUG(9, printf("VAR: %s\n", token.value.variable->name));
token.type = T_VARIABLE;
}
*here = c;
return &token;
}
case_digit:
case DI: { number value;
int tp;
if (c == '0' && *here == '\'') /* 0'<char> */
{ if (isAlpha(here[2]))
{ here += 2;
syntaxError("Illegal number");
}
token.value.prolog = consNum((long)here[1] * negative);
token.type = T_INTEGER;
here += 2;
DEBUG(9, printf("INT: %ld\n", valNum(token.value.prolog)));
return &token;
}
here--; /* start of token */
if ( (tp = scan_number(&here, 10, &value)) == V_ERROR )
syntaxError("Number too large");
/* base'value number */
if ( *here == '\'' )
{ here++;
if ( tp == V_REAL || value.i > 36 )
syntaxError("Base of <base>'<value> too large");
if ( (tp = scan_number(&here, (int)value.i, &value))
== V_ERROR )
syntaxError("Number too large");
if (isAlpha(*here) )
syntaxError("Illegal number");
if ( tp == V_INT )
{ token.value.prolog = consNum(value.i * negative);
token.type = T_INTEGER;
} else
{ token.value.prolog = globalReal(value.r * negative);
token.type = T_REAL;
}
return &token;
}
/* Real numbers */
if ( *here == '.' && isDigit(here[1]) )
{ real n;
if ( tp == V_INT )
{ value.r = (real) value.i;
tp = V_REAL;
}
n = 10.0, here++;
while(isDigit(c = *here) )
{ here++;
value.r += (real)(c-'0') / n;
n *= 10.0;
}
}
if ( *here == 'e' || c == 'E' )
{ number exponent;
bool neg_exponent;
here++;
DEBUG(9, printf("Exponent\n"));
switch(*here)
{ case '-': here++;
neg_exponent = TRUE;
break;
case '+': here++;
default: neg_exponent = FALSE;
break;
}
if ( scan_number(&here, 10, &exponent) != V_INT )
syntaxError("Exponent too large");
if ( tp == V_INT )
{ value.r = (real) value.i;
tp = V_REAL;
}
value.r *= pow((double)10.0,
neg_exponent ? -(double)exponent.i
: (double)exponent.i);
}
if ( isAlpha(c = *here) )
syntaxError("Illegal number");
if ( tp == V_REAL )
{ token.value.prolog = globalReal(value.r * negative);
token.type = T_REAL;
} else
{ token.value.prolog = consNum(value.i * negative);
token.type = T_INTEGER;
}
return &token;
}
case SO: { char tmp[2];
tmp[0] = c, tmp[1] = EOS;
token.value.prolog = (word) lookupAtom(tmp);
token.type = T_NAME;
DEBUG(9, printf("NAME: %s\n", stringAtom(token.value.prolog)));
return &token;
}
case SY: { if (c == '.' && isBlank(here[0]))
{ token.type = T_FULLSTOP;
return &token;
}
start = here - 1;
while( isSymbol(*here) )
here++;
end = *here, *here = EOS;
token.value.prolog = (word) lookupAtom(start);
*here = end;
if ( !must_be_op && isDigit(end) ) /* +- <number> case */
{ if ( token.value.prolog == (word) ATOM_minus )
{ negative = -1;
c = *here++;
goto case_digit;
} else if ( token.value.prolog == (word) ATOM_plus )
{ c = *here++;
goto case_digit;
}
}
token.type = (end == '(' ? T_FUNCTOR : T_NAME);
DEBUG(9, printf("%s: %s\n", end == '(' ? "FUNC" : "NAME", stringAtom(token.value.prolog)));
return &token;
}
case PU: { switch(c)
{ case '{':
case '[': while(isBlank(*here) )
here++;
if (here[0] == matchingBracket(c))
{ here++;
token.value.prolog =
(word)(c == '[' ? ATOM_nil : ATOM_curl);
token.type = T_NAME;
DEBUG(9, printf("NAME: %s\n", stringAtom(token.value.prolog)));
return &token;
}
}
token.value.character = c;
token.type = T_PUNCTUATION;
DEBUG(9, printf("PUNCT: %c\n", token.value.character));
return &token;
}
case SQ: { char *s;
start = here;
for(s=start;;)
{ if (*here == '\'')
{ if (here[1] != '\'')
{ end = *s, *s = EOS;
token.value.prolog = (word) lookupAtom(start);
*s = end;
token.type = (here[1] == '(' ? T_FUNCTOR : T_NAME);
here++;
DEBUG(9, printf("%s: %s\n", here[1] == '(' ? "FUNC" : "NAME", stringAtom(token.value.prolog)));
return &token;
}
here++;
}
*s++ = *here++;
}
}
case DQ: { char *s;
start = here;
for(s=start;;)
{ if (*here == '"')
{ if (here[1] != '"')
{ end = *s, *s = EOS;
#if O_STRING
if ( debugstatus.styleCheck & O_STRING_STYLE )
token.value.prolog = globalString(start);
else
token.value.prolog = (word) stringToList(start);
#else
token.value.prolog = (word) stringToList(start);
#endif /* O_STRING */
DEBUG(9, printf("STR: %s\n", start));
*s = end;
token.type = T_STRING;
here++;
return &token;
}
here++;
}
*s++ = *here++;
}
}
default: { sysError("read/1: tokeniser internal error");
return &token; /* make lint happy */
}
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Build a term on the global stack, given the atom of the functor, the
arity and a vector of arguments. The argument vector either contains
nonvar terms or a reference to a variable block.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static word
build_term(atom, arity, argv)
Atom atom;
int arity;
Word argv;
{ FunctorDef functor = lookupFunctorDef(atom, arity);
word term;
Word argp;
DEBUG(9, printf("Building term %s/%d ... ", stringAtom(atom), arity));
term = globalFunctor(functor);
argp = argTermP(term, 0);
while(arity-- > 0)
{ if (isRef(*argv) )
{ Variable var;
#if O_NO_LEFT_CAST
Word w;
deRef2(argv, w);
var = (Variable) w;
#else
deRef2(argv, (Word)var);
#endif
if (var->address == (Word) NULL)
var->address = argp++;
else
*argp++ = makeRef(var->address);
argv++;
} else
*argp++ = *argv++;
}
DEBUG(9, printf("result: "); pl_write(&term); printf("\n") );
return term;
}
/********************************
* PARSER *
*********************************/
#define priorityClash { syntaxError("Operator priority clash"); }
#define MAX_TERM_NESTING 200
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This part of the parser actually constructs the term. It calls the
tokeniser to find the next token and assumes the tokeniser implements
one-token pushback efficiently. It consists of two mutual recursive
functions: complex_term() which is involved with operator handling and
simple_term() which reads everything, except for operators.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
typedef struct
{ Atom op;
int kind;
int left_pri;
int right_pri;
int op_pri;
} op_entry;
static bool
isOp(atom, kind, e)
Atom atom;
int kind;
op_entry *e;
{ Operator op = isCurrentOperator(atom, kind);
int pri;
if ( op == NULL )
fail;
e->op = atom;
e->kind = kind;
e->op_pri = pri = op->priority;
switch(op->type)
{ case OP_FX: e->left_pri = 0; e->right_pri = pri-1; break;
case OP_FY: e->left_pri = 0; e->right_pri = pri; break;
case OP_XF: e->left_pri = pri-1; e->right_pri = 0; break;
case OP_YF: e->left_pri = pri; e->right_pri = 0; break;
case OP_XFX: e->left_pri = pri-1; e->right_pri = pri-1; break;
case OP_XFY: e->left_pri = pri-1; e->right_pri = pri; break;
case OP_YFX: e->left_pri = pri; e->right_pri = pri-1; break;
case OP_YFY: e->left_pri = pri; e->right_pri = pri; break;
}
succeed;
}
#define PushOp() \
side[side_n++] = in_op; \
if ( side_n >= MAX_TERM_NESTING ) \
syntaxError("Operator stack overflow"); \
side_p = (side_n == 1 ? side : side_p+1);
#define Modify(pri) \
if ( side_p != NULL && pri > side_p->right_pri ) \
{ if ( side_p->kind == OP_PREFIX && rmo == 0 ) \
{ DEBUG(1, printf("Prefix %s to atom\n", stringAtom(side_p->op))); \
rmo++; \
out[out_n++] = (word) side_p->op; \
side_n--; \
side_p = (side_n == 0 ? NULL : side_p-1); \
} else if ( side_p->kind == OP_INFIX && out_n > 0 && rmo == 0 && \
isOp(side_p->op, OP_POSTFIX, side_p) ) \
{ DEBUG(1, printf("Infix %s to postfix\n", stringAtom(side_p->op)));\
rmo++; \
out[out_n-1] = build_term(side_p->op, 1, &out[out_n-1]); \
side_n--; \
side_p = (side_n == 0 ? NULL : side_p-1); \
} \
}
#define Reduce(cond) \
while( out_n > 0 && side_p != NULL && (cond) ) \
{ int arity = (side_p->kind == OP_INFIX ? 2 : 1); \
\
DEBUG(1, printf("Reducing %s/%d\n", stringAtom(side_p->op), arity));\
out[out_n-arity] = build_term(side_p->op, \
arity, \
&out[out_n - arity]); \
out_n -= (arity-1); \
side_n--; \
side_p = (side_n == 0 ? NULL : side_p-1); \
}
static bool
complex_term(stop, term)
char *stop;
Word term;
{ word out[MAX_TERM_NESTING];
op_entry in_op, side[MAX_TERM_NESTING];
int out_n = 0, side_n = 0;
int rmo = 0; /* Rands more than operators */
op_entry *side_p = NULL;
for(;;)
{ bool isname;
Token token;
word in;
if ( out_n != 0 || side_n != 0 ) /* Check for end of term */
{ if ( (token = get_token(rmo == 1)) == (Token) NULL )
fail;
unget_token(); /* only look-ahead! */
switch(token->type)
{ case T_FULLSTOP:
if ( stop == NULL )
goto exit;
break;
case T_PUNCTUATION:
{ if ( stop != NULL && index(stop, token->value.character) )
goto exit;
}
}
}
/* Read `simple' term */
TRY( simple_term(rmo == 1, &in, &isname) );
if ( isname ) /* Check for operators */
{ if ( rmo == 1 && isOp((Atom) in, OP_INFIX, &in_op) )
{ DEBUG(1, printf("Infix op: %s\n", stringAtom((Atom) in)));
Modify(in_op.left_pri);
Reduce(in_op.left_pri > side_p->right_pri);
PushOp();
rmo--;
continue;
}
if ( rmo == 1 && isOp((Atom) in, OP_POSTFIX, &in_op) )
{ DEBUG(1, printf("Postfix op: %s\n", stringAtom((Atom) in)));
Modify(in_op.left_pri);
Reduce(in_op.left_pri > side_p->right_pri);
PushOp();
continue;
}
if ( rmo == 0 && isOp((Atom) in, OP_PREFIX, &in_op) )
{ DEBUG(1, printf("Prefix op: %s\n", stringAtom((Atom) in)));
Reduce(in_op.left_pri > side_p->right_pri);
PushOp();
continue;
}
}
if ( rmo != 0 )
syntaxError("Operator expected");
rmo++;
out[out_n++] = in;
if ( out_n >= MAX_TERM_NESTING )
syntaxError("Operant stack overflow");
}
exit:
Modify(1000000);
Reduce(TRUE);
if ( out_n == 1 && side_n == 0 ) /* simple term */
{ *term = out[0];
succeed;
}
if ( out_n == 0 && side_n == 1 ) /* single operator */
{ *term = (word) side[0].op;
succeed;
}
syntaxError("Unbalanced operators");
}
static bool
simple_term(must_be_op, term, name)
bool must_be_op;
Word term;
bool *name;
{ Token token;
DEBUG(9, printf("simple_term(): Stack at %ld\n", &term));
*name = FALSE;
if ( (token = get_token(must_be_op)) == NULL )
fail;
switch(token->type)
{ case T_FULLSTOP:
syntaxError("Unexpected end of clause");
case T_VOID:
{ *term = token->value.prolog;
succeed;
}
case T_VARIABLE:
{ *term = makeRef(token->value.variable);
succeed;
}
case T_NAME:
*name = TRUE;
case T_REAL:
case T_INTEGER:
case T_STRING:
{ *term = token->value.prolog;
succeed;
}
case T_FUNCTOR:
{ if ( must_be_op )
{ *name = TRUE;
*term = token->value.prolog;
} else
{ word argv[MAXARITY+1];
int argc;
Word argp;
word functor;
functor = token->value.prolog;
argc = 0, argp = argv;
get_token(must_be_op); /* skip '(' */
do
{ TRY( complex_term(",)", argp++) );
if (++argc > MAXARITY)
syntaxError("Arity too high");
token = get_token(must_be_op); /* `,' or `)' */
} while(token->value.character == ',');
*term = build_term((Atom)functor, argc, argv);
}
succeed;
}
case T_PUNCTUATION:
{ switch(token->value.character)
{ case '(':
{ word arg;
TRY( complex_term(")", &arg) );
token = get_token(must_be_op); /* skip ')' */
*term = arg;
succeed;
}
case '{':
{ word arg;
TRY( complex_term("}", &arg) );
token = get_token(must_be_op);
*term = build_term(ATOM_curl, 1, &arg);
succeed;
}
case '[':
{ Word tail = term;
word arg[2];
Atom dot = ATOM_dot;
for(;;)
{ TRY( complex_term(",|]", &arg[0]) );
arg[1] = (word) NULL;
*tail = build_term(dot, 2, arg);
tail = argTermP(*tail, 1);
token = get_token(must_be_op);
switch(token->value.character)
{ case ']':
{ *tail = (word) ATOM_nil;
succeed;
}
case '|':
{ TRY( complex_term("]", &arg[0]) );
if (isRef(arg[0]))
{ Variable var;
#if O_NO_LEFT_CAST
Word w;
deRef2(&arg[0], w);
var = (Variable) w;
#else
deRef2(&arg[0], (Word)var);
#endif
if (var->address == (Word) NULL)
var->address = tail;
else
*tail = makeRef(var->address);
} else
*tail = arg[0];
token = get_token(must_be_op);
succeed;
}
case ',':
continue;
}
}
}
case '|':
case ',':
case ')':
case '}':
case ']':
default:
{ char tmp[2];
*name = TRUE;
tmp[0] = token->value.character;
tmp[1] = EOS;
*term = (word) lookupAtom(tmp);
succeed;
}
}
} /* case T_PUNCTUATION */
default:;
sysError("read/1: Illegal token type (%d)", token->type);
/*NOTREACHED*/
fail;
}
}
static bool
read_term(term, variables, check)
Word term, variables;
bool check;
{ Token token;
word result;
if ((base = raw_read()) == (char *) NULL)
fail;
initVarTable();
here = base;
unget = FALSE;
TRY( complex_term(NULL, &result) );
if ((token = get_token(FALSE)) == (Token) NULL)
fail;
if (token->type != T_FULLSTOP)
syntaxError("End of clause expected");
if ( isRef(result) ) /* term is a single variable! */
{ Variable var;
#if O_NO_LEFT_CAST
Word w;
deRef2(&result, w);
var = (Variable) w;
#else
deRef2(&result, (Word)var);
#endif
if ( var->times != 1 || var->address != (Word)NULL )
sysError("Error while reading a single variable??");
var->address = allocGlobal(sizeof(word));
setVar(*var->address);
result = makeRef(var->address);
}
TRY(pl_unify(term, &result) );
if (variables != (Word) NULL)
TRY(bind_variables(variables) );
if (check)
check_singletons();
succeed;
}
/********************************
* PROLOG CONNECTION *
*********************************/
word
pl_raw_read(term)
Word term;
{ char *s;
register char *top;
s = raw_read();
if ( s == (char *) NULL )
fail;
for(top = s+strlen(s)-1; isBlank(*top); top--);
if (*top == '.')
*top = EOS;
for(; isBlank(*s); s++);
return unifyAtomic(term, lookupAtom(s) );
}
word
pl_read_variables(term, variables)
Word term, variables;
{ return read_term(term, variables, FALSE);
}
word
pl_read_variables3(stream, term, variables)
Word stream, term, variables;
{ streamInput(stream, pl_read_variables(term, variables));
}
word
pl_read(term)
Word term;
{ return read_term(term, (Word)NULL, FALSE);
}
word
pl_read2(stream, term)
Word stream, term;
{ streamInput(stream, pl_read(term));
}
word
pl_read_clause(term)
Word term;
{ return read_term(term, (Word) NULL,
debugstatus.styleCheck & SINGLETON_CHECK ? TRUE : FALSE);
}
word
pl_read_clause2(stream, term)
Word stream, term;
{ streamInput(stream, pl_read_clause(term));
}