home *** CD-ROM | disk | FTP | other *** search
- #include "stralloc.h"
- #include "alloc.h"
- #include "str.h"
- #include "token822.h"
- #include "gen_allocdefs.h"
-
- static struct token822 comma = { TOKEN822_COMMA };
-
- void token822_reverse(ta)
- token822_alloc *ta;
- {
- int i;
- int n;
- struct token822 temp;
-
- n = ta->len - 1;
- for (i = 0;i + i < n;++i)
- {
- temp = ta->t[i];
- ta->t[i] = ta->t[n - i];
- ta->t[n - i] = temp;
- }
- }
-
- GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_ready)
- GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus)
- GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus,token822_append)
-
- static int needspace(t1,t2)
- int t1;
- int t2;
- {
- if (!t1) return 0;
- if (t1 == TOKEN822_COLON) return 1;
- if (t1 == TOKEN822_COMMA) return 1;
- if (t2 == TOKEN822_LEFT) return 1;
- switch(t1)
- {
- case TOKEN822_ATOM: case TOKEN822_LITERAL:
- case TOKEN822_QUOTE: case TOKEN822_COMMENT:
- switch(t2)
- {
- case TOKEN822_ATOM: case TOKEN822_LITERAL:
- case TOKEN822_QUOTE: case TOKEN822_COMMENT:
- return 1;
- }
- }
- return 0;
- }
-
- static int atomok(ch)
- char ch;
- {
- switch(ch)
- {
- case ' ': case '\t': case '\r': case '\n':
- case '(': case '[': case '"':
- case '<': case '>': case ';': case ':':
- case '@': case ',': case '.':
- return 0;
- }
- return 1;
- }
-
- static void atomcheck(t)
- struct token822 *t;
- {
- int i;
- char ch;
- for (i = 0;i < t->slen;++i)
- {
- ch = t->s[i];
- if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\'))
- {
- t->type = TOKEN822_QUOTE;
- return;
- }
- }
- }
-
- int token822_unparse(sa,ta,linelen)
- stralloc *sa;
- token822_alloc *ta;
- unsigned int linelen;
- {
- struct token822 *t;
- int len;
- int ch;
- int i;
- int j;
- int lasttype;
- int newtype;
- char *s;
- char *lineb;
- char *linee;
-
- len = 0;
- lasttype = 0;
- for (i = 0;i < ta->len;++i)
- {
- t = ta->t + i;
- newtype = t->type;
- if (needspace(lasttype,newtype))
- ++len;
- lasttype = newtype;
- switch(newtype)
- {
- case TOKEN822_COMMA:
- len += 3; break;
- case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT:
- case TOKEN822_SEMI: case TOKEN822_COLON:
- ++len; break;
- case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
- if (t->type != TOKEN822_ATOM) len += 2;
- for (j = 0;j < t->slen;++j)
- switch(ch = t->s[j])
- {
- case '"': case '[': case ']': case '(': case ')':
- case '\\': case '\r': case '\n': ++len;
- default: ++len;
- }
- break;
- }
- }
- len += 2;
-
- if (!stralloc_ready(sa,len))
- return -1;
-
- s = sa->s;
- lineb = s;
- linee = 0;
-
- lasttype = 0;
- for (i = 0;i < ta->len;++i)
- {
- t = ta->t + i;
- newtype = t->type;
- if (needspace(lasttype,newtype))
- *s++ = ' ';
- lasttype = newtype;
- switch(newtype)
- {
- case TOKEN822_COMMA:
- *s++ = ',';
- #define NSUW \
- s[0] = '\n'; s[1] = ' '; \
- if (linee && (!linelen || (s - lineb <= linelen))) \
- { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \
- else { if (linee) lineb = linee + 1; linee = s; s += 2; }
- NSUW
- break;
- case TOKEN822_AT: *s++ = '@'; break;
- case TOKEN822_DOT: *s++ = '.'; break;
- case TOKEN822_LEFT: *s++ = '<'; break;
- case TOKEN822_RIGHT: *s++ = '>'; break;
- case TOKEN822_SEMI: *s++ = ';'; break;
- case TOKEN822_COLON: *s++ = ':'; break;
- case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
- if (t->type == TOKEN822_QUOTE) *s++ = '"';
- if (t->type == TOKEN822_LITERAL) *s++ = '[';
- if (t->type == TOKEN822_COMMENT) *s++ = '(';
- for (j = 0;j < t->slen;++j)
- switch(ch = t->s[j])
- {
- case '"': case '[': case ']': case '(': case ')':
- case '\\': case '\r': case '\n': *s++ = '\\';
- default: *s++ = ch;
- }
- if (t->type == TOKEN822_QUOTE) *s++ = '"';
- if (t->type == TOKEN822_LITERAL) *s++ = ']';
- if (t->type == TOKEN822_COMMENT) *s++ = ')';
- break;
- }
- }
- NSUW
- --s;
- sa->len = s - sa->s;
- return 1;
- }
-
- int token822_unquote(sa,ta)
- stralloc *sa;
- token822_alloc *ta;
- {
- struct token822 *t;
- int len;
- int i;
- int j;
- char *s;
-
- len = 0;
- for (i = 0;i < ta->len;++i)
- {
- t = ta->t + i;
- switch(t->type)
- {
- case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT:
- case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON:
- ++len; break;
- case TOKEN822_LITERAL:
- len += 2;
- case TOKEN822_ATOM: case TOKEN822_QUOTE:
- len += t->slen;
- }
- }
-
- if (!stralloc_ready(sa,len))
- return -1;
-
- s = sa->s;
-
- for (i = 0;i < ta->len;++i)
- {
- t = ta->t + i;
- switch(t->type)
- {
- case TOKEN822_COMMA: *s++ = ','; break;
- case TOKEN822_AT: *s++ = '@'; break;
- case TOKEN822_DOT: *s++ = '.'; break;
- case TOKEN822_LEFT: *s++ = '<'; break;
- case TOKEN822_RIGHT: *s++ = '>'; break;
- case TOKEN822_SEMI: *s++ = ';'; break;
- case TOKEN822_COLON: *s++ = ':'; break;
- case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
- if (t->type == TOKEN822_LITERAL) *s++ = '[';
- for (j = 0;j < t->slen;++j)
- *s++ = t->s[j];
- if (t->type == TOKEN822_LITERAL) *s++ = ']';
- break;
- case TOKEN822_COMMENT: break;
- }
- }
- sa->len = s - sa->s;
- return 1;
- }
-
- int token822_parse(ta,sa,buf)
- token822_alloc *ta;
- stralloc *sa;
- stralloc *buf;
- {
- int i;
- int salen;
- int level;
- struct token822 *t;
- int numtoks;
- int numchars;
- char *cbuf;
-
- salen = sa->len;
-
- numchars = 0;
- numtoks = 0;
- for (i = 0;i < salen;++i)
- switch(sa->s[i])
- {
- case '.': case ',': case '@': case '<': case '>': case ':': case ';':
- ++numtoks; break;
- case ' ': case '\t': case '\r': case '\n': break;
- case ')': case ']': return 0;
- /* other control chars and non-ASCII chars are also bad, in theory */
- case '(':
- level = 1;
- while (level)
- {
- if (++i >= salen) return 0;
- switch(sa->s[i])
- {
- case '(': ++level; break;
- case ')': --level; break;
- case '\\': if (++i >= salen) return 0;
- default: ++numchars;
- }
- }
- ++numtoks;
- break;
- case '"':
- level = 1;
- while (level)
- {
- if (++i >= salen) return 0;
- switch(sa->s[i])
- {
- case '"': --level; break;
- case '\\': if (++i >= salen) return 0;
- default: ++numchars;
- }
- }
- ++numtoks;
- break;
- case '[':
- level = 1;
- while (level)
- {
- if (++i >= salen) return 0;
- switch(sa->s[i])
- {
- case ']': --level; break;
- case '\\': if (++i >= salen) return 0;
- default: ++numchars;
- }
- }
- ++numtoks;
- break;
- default:
- do
- {
- ++numchars;
- if (++i >= salen)
- break;
- }
- while (atomok(sa->s[i]));
- --i;
- ++numtoks;
- }
-
- if (!token822_ready(ta,numtoks))
- return -1;
- if (!stralloc_ready(buf,numchars))
- return -1;
- cbuf = buf->s;
- ta->len = numtoks;
-
- t = ta->t;
- for (i = 0;i < salen;++i)
- switch(sa->s[i])
- {
- case '.': t->type = TOKEN822_DOT; ++t; break;
- case ',': t->type = TOKEN822_COMMA; ++t; break;
- case '@': t->type = TOKEN822_AT; ++t; break;
- case '<': t->type = TOKEN822_LEFT; ++t; break;
- case '>': t->type = TOKEN822_RIGHT; ++t; break;
- case ':': t->type = TOKEN822_COLON; ++t; break;
- case ';': t->type = TOKEN822_SEMI; ++t; break;
- case ' ': case '\t': case '\r': case '\n': break;
- case '(':
- t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0;
- level = 1;
- while (level)
- {
- ++i; /* assert: < salen */
- switch(sa->s[i])
- {
- case '(': ++level; break;
- case ')': --level; break;
- case '\\': ++i; /* assert: < salen */
- default: *cbuf++ = sa->s[i]; ++t->slen;
- }
- }
- ++t;
- break;
- case '"':
- t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0;
- level = 1;
- while (level)
- {
- ++i; /* assert: < salen */
- switch(sa->s[i])
- {
- case '"': --level; break;
- case '\\': ++i; /* assert: < salen */
- default: *cbuf++ = sa->s[i]; ++t->slen;
- }
- }
- ++t;
- break;
- case '[':
- t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0;
- level = 1;
- while (level)
- {
- ++i; /* assert: < salen */
- switch(sa->s[i])
- {
- case ']': --level; break;
- case '\\': ++i; /* assert: < salen */
- default: *cbuf++ = sa->s[i]; ++t->slen;
- }
- }
- ++t;
- break;
- default:
- t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0;
- do
- {
- *cbuf++ = sa->s[i]; ++t->slen;
- if (++i >= salen)
- break;
- }
- while (atomok(sa->s[i]));
- atomcheck(t);
- --i;
- ++t;
- }
- return 1;
- }
-
- static int gotaddr(taout,taaddr,callback)
- token822_alloc *taout;
- token822_alloc *taaddr;
- int (*callback)();
- {
- int i;
-
- if (callback(taaddr) != 1)
- return 0;
-
- if (!token822_readyplus(taout,taaddr->len))
- return 0;
-
- for (i = 0;i < taaddr->len;++i)
- taout->t[taout->len++] = taaddr->t[i];
-
- taaddr->len = 0;
- return 1;
- }
-
- int token822_addrlist(taout,taaddr,ta,callback)
- token822_alloc *taout;
- token822_alloc *taaddr;
- token822_alloc *ta;
- int (*callback)();
- {
- struct token822 *t;
- struct token822 *beginning;
- int ingroup;
- int wordok;
-
- taout->len = 0;
- taaddr->len = 0;
-
- if (!token822_readyplus(taout,1)) return -1;
- if (!token822_readyplus(taaddr,1)) return -1;
-
- ingroup = 0;
- wordok = 1;
-
- beginning = ta->t + 2;
- t = ta->t + ta->len - 1;
-
- /* rfc 822 address lists are easy to parse from right to left */
-
- #define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1;
- #define FLUSHCOMMA if (taaddr->len) { \
- if (!gotaddr(taout,taaddr,callback)) return -1; \
- if (!token822_append(taout,&comma)) return -1; }
- #define ADDRLEFT if (!token822_append(taaddr,t--)) return -1;
- #define OUTLEFT if (!token822_append(taout,t--)) return -1;
-
- while (t >= beginning)
- {
- switch(t->type)
- {
- case TOKEN822_SEMI:
- FLUSHCOMMA
- if (ingroup) return 0;
- ingroup = 1;
- wordok = 1;
- break;
- case TOKEN822_COLON:
- FLUSH
- if (!ingroup) return 0;
- ingroup = 0;
- while ((t >= beginning) && (t->type != TOKEN822_COMMA))
- OUTLEFT
- if (t >= beginning)
- OUTLEFT
- wordok = 1;
- continue;
- case TOKEN822_RIGHT:
- FLUSHCOMMA
- OUTLEFT
- while ((t >= beginning) && (t->type != TOKEN822_LEFT))
- ADDRLEFT
- /* important to use address here even if it's empty: <> */
- if (!gotaddr(taout,taaddr,callback)) return -1;
- if (t < beginning) return 0;
- OUTLEFT
- while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) || (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) || (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT)))
- OUTLEFT
- wordok = 0;
- continue;
- case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
- if (!wordok)
- FLUSHCOMMA
- wordok = 0;
- ADDRLEFT
- continue;
- case TOKEN822_COMMENT:
- /* comment is lexically a space; shouldn't affect wordok */
- break;
- case TOKEN822_COMMA:
- FLUSH
- wordok = 1;
- break;
- default:
- wordok = 1;
- ADDRLEFT
- continue;
- }
- OUTLEFT
- }
- FLUSH
- ++t;
- while (t > ta->t)
- if (!token822_append(taout,--t)) return -1;
-
- token822_reverse(taout);
- return 1;
- }
-