home *** CD-ROM | disk | FTP | other *** search
- /*
- rmescseq.c -- Removes ANSI X3.64 escape sequences from input stream.
-
- To build: [g]cc -o rmescseq rmescseq.c
- To use: rmescseq < infile > outfile
-
- Authors: F. da Cruz, J. Altman, Columbia University.
- (Adapted by J. Altman from C-Kermit ckucns.c.)
-
- Copyright (C) 2001,
- Trustees of Columbia University in the City of New York,
- All rights reserved. Terms of use and redistribution as for C-Kermit 7.0:
- ftp://kermit.columbia.edu/kermit/f/COPYING.TXT
- */
-
- /* Escape-sequence parser state definitions. */
-
- #define ES_NORMAL 0 /* Normal, not in escape sequence */
- #define ES_GOTESC 1 /* Current character is ESC */
- #define ES_ESCSEQ 2 /* Inside an escape sequence */
- #define ES_GOTCSI 3 /* Inside a control sequence */
- #define ES_STRING 4 /* Inside DCS,OSC,PM, or APC string */
- #define ES_TERMIN 5 /* 1st char of string terminator */
-
- /* Important buffer lengths */
-
- #define ESCBUFLEN 128
- #define STRBUFLEN 256
-
- /* Important Control Characters */
-
- #define NUL 0
- #define BS 8
- #define CAN 24
- #define SUB 26
- #define ESC 27
- #define SP 32
- #define CSI 155
- #define ST 156
-
- /* The usual boolean values */
-
- #define FALSE 0
- #define TRUE 1
-
- /* Some state */
-
- static int escstate = ES_NORMAL;
- static int escnext = 1;
- static int esclast = 0;
- static int strrecv = 0;
- static unsigned char escbuffer[ESCBUFLEN];
- static unsigned char strbuf[STRBUFLEN];
- static int strlength = 0;
-
- /* This function parses an incoming data stream and extracts the ANSI x3.64 */
- /* commands and places them into escbuffer[]. Characters which are part of */
- /* the data stream but not part of a x3.64 command sequence are returned. */
- /* When a character is not returned the function returns either -1 to */
- /* indicate that a complete sequence has yet to be received or -2 to signal */
- /* that a complete sequence is in the escbuffer[]. */
-
- int
- ansi_x3_64(ch) unsigned char ch; {
-
- if (escstate == ES_NORMAL) { /* Not in an escape sequence */
- if (ch == ESC) { /* This character is an Escape */
- escstate = ES_GOTESC; /* Change state to GOTESC */
- esclast = 0; /* Reset buffer pointer */
- escbuffer[0] = ESC;
- }
- else if ( ch == CSI) {
- escstate = ES_GOTCSI; /* Escape sequence was restarted */
- escbuffer[0] = CSI; /* Save in case we have to replay it */
- esclast = 1; /* Reset buffer pointer, but */
- escbuffer[1] = '['; /* But translate to 7-bit */
- } else { /* Not an ESC, stay in NORMAL state */
- return(ch);
- }
- return(-1); /* Return in either case. */
- }
-
- /* We are in an escape sequence... */
-
- if (ch < SP || (ch == CSI)) { /* Control character? */
- if (ch == CAN || ch == SUB) { /* These cancel an escape sequence */
- escstate = ES_NORMAL; /* Go back to normal. */
- strlength = 0;
- strbuf[0] = 0 ;
- strrecv = FALSE ;
- } else if (ch == BS) { /* Erases previous */
- if ( escstate == ES_GOTCSI && esclast == 1 ) {
- escstate = ES_GOTESC ;
- esclast = 0 ;
- }
- else if ( escstate == ES_ESCSEQ && esclast == 1 ) {
- escstate = ES_GOTESC ;
- esclast = 0 ;
- }
- else if ( escstate == ES_GOTESC && esclast == 0 ) {
- escstate = ES_NORMAL ;
- }
- else if ( escstate == ES_TERMIN ) {
- escstate = ES_STRING ;
- }
- else if ( escstate == ES_STRING ) {
- if ( strlength > 0 )
- strlength-- ;
- else {
- escstate = ES_GOTESC ;
- }
- }
- else if ( esclast > 0 ) {
- esclast--; /* Escape sequence char (really?) */
- }
- } else if (ch == ESC) {
- if ( escstate == ES_STRING )
- escstate = ES_TERMIN ;
- else {
- escstate = ES_GOTESC; /* Escape sequence was restarted */
- esclast = 0; /* Reset buffer pointer */
- escbuffer[0] = ESC; /* Save in case we have to replay it */
- }
- } else if (ch == CSI) {
- escstate = ES_GOTCSI; /* Escape sequence was restarted */
- escbuffer[0] = CSI; /* Save in case we have to replay it */
- esclast = 1; /* Reset buffer pointer, but */
- escbuffer[1] = '['; /* But translate for vtescape() */
- } else if (ch != NUL) {
- return(ch);
- }
- return(-1);
- }
- /*
- Put this character in the escape sequence buffer.
- But we don't put "strings" in this buffer;
- Note that indexing starts at 1, not 0.
- */
- if (escstate != ES_STRING && escstate != ES_TERMIN)
- if (esclast < ESCBUFLEN)
- escbuffer[++esclast] = ch;
-
- switch (escstate) { /* Enter esc sequence state switcher */
- case ES_GOTESC: /* GOTESC state, prev char was Esc */
- switch ( ch ) {
- case '[': /* Left bracket after ESC is CSI */
- escstate = ES_GOTCSI; /* Change to GOTCSI state */
- break;
- case '_': /* Application Program Command (APC) */
- case 'P': /* Device Control String (DCS) Intro */
- case 'Q': /* Private Use One (PU1) Introducer */
- case 'R': /* Private Use Two (PU2) Introducer */
- case 'X': /* Start of String (SOS) Introducer */
- case '^': /* Privacy Message (PM) */
- case ']': /* Operating System Command (OSC) */
- escstate = ES_STRING; /* Enter STRING-absorption state */
- strrecv = TRUE; /* We are receiving a string */
- strlength = 0; /* and reset string buffer index */
- break;
- default:
- if ((ch > 057) && (ch < 0177)){/* Or final char, '0' thru '~' */
- escstate = ES_NORMAL; /* Go back to normal. */
- return(-2); /* Go act on it. */
- } else
- escstate = ES_ESCSEQ;
- }
- break;
-
- case ES_ESCSEQ: /* ESCSEQ -- in an escape sequence */
- if (ch > 057 && ch < 0177) { /* Final character is '0' thru '~' */
- escstate = ES_NORMAL; /* Go back to normal. */
- return(-2); /* Go handle it */
- }
- case ES_GOTCSI: /* GOTCSI -- In a control sequence */
- if (ch > 077 && ch < 0177) { /* Final character is '@' thru '~' */
- escstate = ES_NORMAL; /* Go back to normal. */
- return(-2); /* Go act on it. */
- }
- case ES_STRING: /* Inside a string */
- if (ch == ESC) { /* ESC may be 1st char of terminator */
- escstate = ES_TERMIN; /* Change state to find out. */
- } else if (ch == ST) { /* C1 String Terminator */
- escstate = ES_NORMAL; /* If so, back to NORMAL */
- strrecv = FALSE ;
- } else if (strrecv) {
- if (strlength < STRBUFLEN) { /* If in string, */
- strbuf[strlength++] = ch; /* deposit this character */
- } else { /* Buffer overrun */
- strrecv = FALSE ; /* Discard what we got */
- strlength = 0; /* and go back to normal */
- escstate = ES_NORMAL ;
- }
- }
- break; /* Absorb all other characters. */
-
- case ES_TERMIN: /* May have a string terminator */
- if (ch == '\\') { /* which must be backslash */
- escstate = ES_NORMAL; /* If so, back to NORMAL */
- strrecv = FALSE ;
- } else {
- if (ch >= SP) /* Just a stray Esc character. */
- escstate = ES_STRING; /* Return to string absorption. */
- if (strrecv) {
- if (strlength+1 < STRBUFLEN) { /* In string, */
- strbuf[strlength++] = ESC; /* deposit Esc character */
- strbuf[strlength++] = ch; /* and this character too */
- }
- }
- }
- }
- return(-1);
- }
-
- int
- main(argc, argv) int argc; char ** argv; {
- unsigned char ch;
- int n;
-
- while (1) {
- n = read(0, &ch, 1);
- if (n <= 0)
- return;
-
- if ((ch > 127) && (ch < 160) && /* It's a C1 character */
- ch != CSI) { /* But not CSI */
- n = ansi_x3_64(ESC); /* Convert to C0 form */
- if (n >= 0) {
- ch = (n & 0xFF);
- write(1, &ch, 1);
- }
- ch = (ch & 0x7F) | 0x40;
- }
- n = ansi_x3_64(ch);
- if (n >= 0) {
- ch = (n & 0xFF);
- write(1, &ch, 1);
- }
- }
- }
-