home *** CD-ROM | disk | FTP | other *** search
- /* rast.c
- Translate sgmls output to RAST result format.
-
- Written by James Clark (jjc@jclark.com). */
-
- #include "config.h"
- #include "std.h"
- #include "sgmls.h"
- #include "getopt.h"
-
- #ifdef USE_PROTOTYPES
- #define P(parms) parms
- #else
- #define P(parms) ()
- #endif
-
- #ifdef __GNUC__
- #define NO_RETURN volatile
- #else
- #define NO_RETURN /* as nothing */
- #endif
-
- #ifdef VARARGS
- #define VP(parms) ()
- #else
- #define VP(parms) P(parms)
- #endif
-
- #ifdef USE_ISASCII
- #define ISASCII(c) isascii(c)
- #else
- #define ISASCII(c) (1)
- #endif
-
- NO_RETURN void error VP((char *,...));
-
- static void input_error P((int, char *, unsigned long));
- static int do_file P((FILE *));
- static void usage P((void));
-
- static void output_processing_instruction P((char *, unsigned));
- static void output_data P((struct sgmls_data *, int));
- static void output_data_lines P((char *, unsigned));
- static void output_internal_sdata P((char *, unsigned));
- static void output_external_entity P((struct sgmls_external_entity *));
- static void output_external_entity_info P((struct sgmls_external_entity *));
- static void output_element_start P((char *, struct sgmls_attribute *));
- static void output_element_end P((char *));
- static void output_attribute P((struct sgmls_attribute *));
- static void output_tokens P((char **, int));
- static void output_markup_chars P((char *, unsigned));
- static void output_markup_string P((char *));
- static void output_char P((int, int));
- static void output_flush P((int));
- static void output_external_id P((char *, char *));
- static void output_entity P((struct sgmls_entity *));
- static void output_external_entity_info P((struct sgmls_external_entity *));
- static void output_internal_entity P((struct sgmls_internal_entity *));
-
- #define output_flush_markup() output_flush('!')
- #define output_flush_data() output_flush('|')
-
- static FILE *outfp;
- static int char_count = 0;
- static char *program_name;
-
- int main(argc, argv)
- int argc;
- char **argv;
- {
- int c;
- int opt;
- char *output_file = 0;
-
- program_name = argv[0];
-
- while ((opt = getopt(argc, argv, "o:")) != EOF)
- switch (opt) {
- case 'o':
- output_file = optarg;
- break;
- case '?':
- usage();
- default:
- abort();
- }
-
- if (output_file) {
- errno = 0;
- outfp = fopen(output_file, "w");
- if (!outfp)
- error("couldn't open `%s' for output: %s", strerror(errno));
- }
- else {
- outfp = tmpfile();
- if (!outfp)
- error("couldn't create temporary file: %s", strerror(errno));
- }
-
- if (argc - optind > 1)
- usage();
-
- if (argc - optind == 1) {
- if (!freopen(argv[optind], "r", stdin))
- error("couldn't open `%s' for input: %s", argv[optind], strerror(errno));
- }
-
- (void)sgmls_set_errhandler(input_error);
-
- if (!do_file(stdin)) {
- fclose(outfp);
- if (output_file) {
- if (!freopen(output_file, "w", stdout))
- error("couldn't reopen `%s' for output: %s", strerror(errno));
- }
- fputs("#ERROR\n", stdout);
- exit(EXIT_FAILURE);
- }
-
- if (output_file) {
- errno = 0;
- if (fclose(outfp) == EOF)
- error("error closing `%s': %s", output_file, strerror(errno));
- }
- else {
- errno = 0;
- if (fseek(outfp, 0L, SEEK_SET))
- error("couldn't rewind temporary file: %s", strerror(errno));
- while ((c = getc(outfp)) != EOF)
- if (putchar(c) == EOF)
- error("error writing standard output: %s", strerror(errno));
- }
- exit(EXIT_SUCCESS);
- }
-
- static
- void usage()
- {
- fprintf(stderr, "usage: %s [-o output_file] [input_file]\n", program_name);
- exit(EXIT_FAILURE);
- }
-
- static
- int do_file(fp)
- FILE *fp;
- {
- struct sgmls *sp;
- struct sgmls_event e;
- int conforming = 0;
-
- sp = sgmls_create(fp);
- while (sgmls_next(sp, &e))
- switch (e.type) {
- case SGMLS_EVENT_DATA:
- output_data(e.u.data.v, e.u.data.n);
- break;
- case SGMLS_EVENT_ENTITY:
- output_external_entity(e.u.entity);
- break;
- case SGMLS_EVENT_PI:
- output_processing_instruction(e.u.pi.s, e.u.pi.len);
- break;
- case SGMLS_EVENT_START:
- output_element_start(e.u.start.gi, e.u.start.attributes);
- sgmls_free_attributes(e.u.start.attributes);
- break;
- case SGMLS_EVENT_END:
- output_element_end(e.u.end.gi);
- break;
- case SGMLS_EVENT_SUBSTART:
- {
- int level = 1;
- output_external_entity(e.u.entity);
- while (level > 0) {
- if (!sgmls_next(sp, &e))
- return 0;
- switch (e.type) {
- case SGMLS_EVENT_SUBSTART:
- level++;
- break;
- case SGMLS_EVENT_SUBEND:
- level--;
- break;
- case SGMLS_EVENT_START:
- sgmls_free_attributes(e.u.start.attributes);
- break;
- default:
- /* prevent compiler warnings */
- break;
- }
- }
- }
- break;
- case SGMLS_EVENT_APPINFO:
- break;
- case SGMLS_EVENT_CONFORMING:
- conforming = 1;
- break;
- default:
- abort();
- }
- sgmls_free(sp);
- return conforming;
- }
-
- static
- void output_processing_instruction(s, len)
- char *s;
- unsigned len;
- {
- fputs("[?", outfp);
- if (len > 0) {
- putc('\n', outfp);
- output_data_lines(s, len);
- output_flush_data();
- }
- fputs("]\n", outfp);
- }
-
- static
- void output_data(v, n)
- struct sgmls_data *v;
- int n;
- {
- int i;
- for (i = 0; i < n; i++) {
- if (v[i].is_sdata)
- output_internal_sdata(v[i].s, v[i].len);
- else if (v[i].len > 0)
- output_data_lines(v[i].s, v[i].len);
- }
- }
-
- static
- void output_data_lines(s, n)
- char *s;
- unsigned n;
- {
- assert(n > 0);
- for (; n > 0; --n)
- output_char((unsigned char)*s++, '|');
- output_flush_data();
- }
-
- static
- void output_internal_sdata(s, n)
- char *s;
- unsigned n;
- {
- fputs("#SDATA-TEXT\n", outfp);
- output_markup_chars(s, n);
- output_flush_markup();
- fputs("#END-SDATA\n", outfp);
- }
-
- static
- void output_external_entity(e)
- struct sgmls_external_entity *e;
- {
- fprintf(outfp, "[&%s\n", e->name);
- output_external_entity_info(e);
- fputs("]\n", outfp);
- }
-
- static
- void output_element_start(gi, att)
- char *gi;
- struct sgmls_attribute *att;
- {
- fprintf(outfp, "[%s", gi);
- if (att) {
- struct sgmls_attribute *p;
- putc('\n', outfp);
- for (p = att; p; p = p->next)
- output_attribute(p);
- }
- fputs("]\n", outfp);
- }
-
- static
- void output_element_end(gi)
- char *gi;
- {
- fprintf(outfp, "[/%s]\n", gi);
- }
-
- static
- void output_attribute(p)
- struct sgmls_attribute *p;
- {
- fprintf(outfp, "%s=\n", p->name);
- switch (p->type) {
- case SGMLS_ATTR_IMPLIED:
- fputs("#IMPLIED\n", outfp);
- break;
- case SGMLS_ATTR_CDATA:
- {
- struct sgmls_data *v = p->value.data.v;
- int n = p->value.data.n;
- int i;
- for (i = 0; i < n; i++)
- if (v[i].is_sdata)
- output_internal_sdata(v[i].s, v[i].len);
- else {
- output_markup_chars(v[i].s, v[i].len);
- output_flush_markup();
- }
- }
- break;
- case SGMLS_ATTR_TOKEN:
- output_tokens(p->value.token.v, p->value.token.n);
- break;
- case SGMLS_ATTR_ENTITY:
- {
- int i;
- for (i = 0; i < p->value.entity.n; i++) {
- struct sgmls_entity *e = p->value.entity.v[i];
- char *name;
-
- if (e->is_internal)
- name = e->u.internal.name;
- else
- name = e->u.external.name;
- if (i > 0)
- output_markup_string(" ");
- output_markup_string(name);
- }
- output_flush_markup();
- for (i = 0; i < p->value.entity.n; i++)
- output_entity(p->value.entity.v[i]);
- }
- break;
- case SGMLS_ATTR_NOTATION:
- output_tokens(&p->value.notation->name, 1);
- output_external_id(p->value.notation->pubid, p->value.notation->sysid);
- break;
- }
- }
-
- static void output_tokens(v, n)
- char **v;
- int n;
- {
- int i;
- assert(n > 0);
- output_markup_string(v[0]);
- for (i = 1; i < n; i++) {
- output_markup_string(" ");
- output_markup_string(v[i]);
- }
- output_flush_markup();
- }
-
- static
- void output_markup_chars(s, n)
- char *s;
- unsigned n;
- {
- for (; n > 0; --n)
- output_char((unsigned char)*s++, '!');
- }
-
- static
- void output_markup_string(s)
- char *s;
- {
- while (*s)
- output_char((unsigned char)*s++, '!');
- }
-
- static
- void output_char(c, delim)
- int c;
- int delim;
- {
- if (ISASCII(c) && isprint(c)) {
- if (char_count == 0)
- putc(delim, outfp);
- putc(c, outfp);
- char_count++;
- if (char_count == 60) {
- putc(delim, outfp);
- putc('\n', outfp);
- char_count = 0;
- }
- }
- else {
- output_flush(delim);
- switch (c) {
- case RECHAR:
- fputs("#RE\n", outfp);
- break;
- case RSCHAR:
- fputs("#RS\n", outfp);
- break;
- case TABCHAR:
- fputs("#TAB\n", outfp);
- break;
- default:
- fprintf(outfp, "#%d\n", c);
- }
- }
- }
-
- static
- void output_flush(delim)
- int delim;
- {
- if (char_count > 0) {
- putc(delim, outfp);
- putc('\n', outfp);
- char_count = 0;
- }
- }
-
- static
- void output_external_id(pubid, sysid)
- char *pubid;
- char *sysid;
- {
- if (!pubid && !sysid)
- fputs("#SYSTEM\n#NONE\n", outfp);
- else {
- if (pubid) {
- fputs("#PUBLIC\n", outfp);
- if (*pubid) {
- output_markup_string(pubid);
- output_flush_markup();
- }
- else
- fputs("#EMPTY\n", outfp);
- }
- if (sysid) {
- fputs("#SYSTEM\n", outfp);
- if (*sysid) {
- output_markup_string(sysid);
- output_flush_markup();
- }
- else
- fputs("#EMPTY\n", outfp);
- }
- }
- }
-
- static
- void output_entity(e)
- struct sgmls_entity *e;
- {
- if (e->is_internal)
- output_internal_entity(&e->u.internal);
- else
- output_external_entity_info(&e->u.external);
- fputs("#END-ENTITY", outfp);
- #ifndef ASIS
- putc('\n', outfp);
- #endif
- }
-
- static
- void output_external_entity_info(e)
- struct sgmls_external_entity *e;
- {
- switch (e->type) {
- case SGMLS_ENTITY_CDATA:
- fputs("#CDATA-EXTERNAL", outfp);
- break;
- case SGMLS_ENTITY_SDATA:
- fputs("#SDATA-EXTERNAL", outfp);
- break;
- case SGMLS_ENTITY_NDATA:
- fputs("#NDATA-EXTERNAL", outfp);
- break;
- case SGMLS_ENTITY_SUBDOC:
- fputs("#SUBDOC", outfp);
- break;
- }
- putc('\n', outfp);
- output_external_id(e->pubid, e->sysid);
- if (e->type != SGMLS_ENTITY_SUBDOC) {
- struct sgmls_attribute *p;
- fprintf(outfp, "#NOTATION=%s\n", e->notation->name);
- output_external_id(e->notation->pubid, e->notation->sysid);
- for (p = e->attributes; p; p = p->next)
- output_attribute(p);
- }
- }
-
- static
- void output_internal_entity(e)
- struct sgmls_internal_entity *e;
- {
- if (e->data.is_sdata)
- fputs("#SDATA-INTERNAL", outfp);
- else
- fputs("#CDATA-INTERNAL", outfp);
- putc('\n', outfp);
- output_markup_chars(e->data.s, e->data.len);
- output_flush_markup();
- }
-
- static
- void input_error(num, str, lineno)
- int num;
- char *str;
- unsigned long lineno;
- {
- error("Error at input line %lu: %s", lineno, str);
- }
-
- NO_RETURN
- #ifdef VARARGS
- void error(va_alist) va_dcl
- #else
- void error(char *message,...)
- #endif
- {
- #ifdef VARARGS
- char *message;
- #endif
- va_list ap;
-
- fprintf(stderr, "%s: ", program_name);
- #ifdef VARARGS
- va_start(ap);
- message = va_arg(ap, char *);
- #else
- va_start(ap, message);
- #endif
- vfprintf(stderr, message, ap);
- va_end(ap);
- fputc('\n', stderr);
- fflush(stderr);
- exit(EXIT_FAILURE);
- }
-