home *** CD-ROM | disk | FTP | other *** search
- #include <stdio.h>
- #include <ctype.h>
-
- /* A very stupid sed (ie. it only supports the substitiute
- * command. Usage is:
- *
- * set <cmd> [<file list>]
- *
- * where <cmd> is:
- *
- * [s]/<pat>/<string>[/g]
- *
- * The regular expression <pat> is replaced with <string> everywhere
- * it's found in the file. If the /g is present it's replaced
- * everywhere on the line, else only the first ocurance on the line
- * is replaced. In the replacement <string>:
- *
- * \e is mapped to ESC
- * \t is mapped to HT
- * \s is mapped to ' '
- * \n is mapped to LF
- * \b is mapped to BS
- * \& is mapped to &
- * \\ is mapped to \
- * & is mapped to the string that matched <pat>
- * \xYY is mapped to the hex number YY.
- * \<anything else> is mapped to <anything else>
- */
-
- /* HEX_L(c) converts c (a hex digit in ASCII) to a number.
- * HEX_H(c) does the same but puts the result in the high nibble.
- */
-
- #define BSIZE 256 /* Max input line size */
- typedef char TOKEN;
- #define HEX_L(c) ( isdigit(c) ? (c)-'0' : ((toupper(c))-'A')+10)
- #define HEX_H(c) ( HEX_L(c) << 4 )
- #define E(x) fprintf(stderr, "%s\n", x);
-
-
- /*------------------------------------------------------------------*/
-
- extern TOKEN *makepat( char *, int );
- extern char *matchs ( char *, TOKEN *, int );
-
- /*------------------------------------------------------------------*/
-
- TOKEN *getpat( pp, isglobal )
- char **pp;
- int *isglobal;
- {
- /* Return a pointer to the pattern template for string.
- * Update isglobal to true if /g is found at the
- * end of the string. Update *pp to point at the replacement
- * string.
- */
-
- register int delim;
- register char *p = *pp ;
- TOKEN *pat ;
-
- if( *p == 's' ) /* Extract the expression: */
- p++; /* Skip the 's' if it's there */
- delim = *p++ ; /* Get the delimiter and skip past it */
-
-
- if( !(pat = makepat(p, delim)) )
- {
- fprintf(stderr, "Sub: Illegal pattern\n");
- exit( 1 );
- }
-
-
- while( *p && *p != delim ) /* Skip past the pattern */
- p++;
- if( *p ) /* And the delimiter */
- p++;
-
- *pp = p; /* remember the replacement str */
-
- while( *p && *p != delim ) /* get rid of the second delim */
- p++;
-
- if( *p )
- *p++ = '\0'; /* by replacing it with a null */
- else
- *p = '\0';
-
- *isglobal = *p == 'g' ;
- return pat;
- }
-
- /*------------------------------------------------------------------*/
-
- int subst( src, pat, replacement, global )
- TOKEN *pat;
- char *src, *replacement;
- {
- /* Print the string with any replacement made. Substitute
- * & in the replacement string with the src pattern.
- * Expand:
- * \& in the replacement string to '&'
- * \s in the replacement string to a space
- * \t in the replacement string to a tab
- * \n in the replacement string to a newline.
- *
- * If global is true the routine is applied recursively
- * to the part of src that follows a matched pattern.
- */
-
- register char *p, *start, *end;
-
- p = src + (strlen(src)-1); /* Remove \n from the end of the */
- if( *p == '\n' ) /* the line if one's there. */
- *p = '\0';
-
- if( !(start = matchs(src, pat, 0)) )
- {
- puts( src ); /* No match found */
- return( 0 ); /* Just print string */
- }
-
- end = matchs(start, pat, 1);
-
- while( src < start )
- fputchar( *src++ );
-
- print_replacement( replacement, start, end );
-
-
- if( *end ) /* Increment end to point at the start of the */
- end++; /* tail rather than the end of the match string */
-
- if( *end && global )
- return( 1 + subst(end, pat, replacement, global) );
-
- puts( end );
- return( 1 );
- }
-
- /*----------------------------------------------------------------------*/
-
-
- print_replacement( rep, start, end )
- char *rep, *start, *end;
- {
- /* Print the replacement string. start and end are the
- * boundarys of the string inserted instead of & in the
- * replacement string. In addition:
- * \e is mapped to ESC
- * \t is mapped to HT
- * \s is mapped to ' '
- * \n is mapped to LF
- * \b is mapped to BS
- * \xYY is mapped to the hex number YY.
- */
-
- register char *p;
- register int i;
-
- for( ; *rep ; rep++ )
- {
- if( *rep == '\\' )
- {
- switch( *++rep )
- {
- case 'b': fputchar('\b'); break;
- case 'e': fputchar(0x1b); break;
- case 'n': fputchar('\n'); break;
- case 's': fputchar(' ' ); break;
- case 't': fputchar('\t'); break;
- default: fputchar(*rep); break;
- case 'x': rep++;
- i = HEX_H( *rep );
- rep++;
- i |= HEX_L( *rep );
- fputchar(i);
- break;
- }
- }
- else if( *rep == '&' )
- for( p = start; *p && p <= end ; fputchar(*p++) )
- ;
- else
- fputchar( *rep );
- }
- }
-
- /*----------------------------------------------------------------------*/
-
- usage()
- {
- E("Usage: sub <command> [<file list>]\n")
- E("Scan through the file list (or standard input if no");
- E("files are listed, substituting all matches of a regular");
- E("expression specified in <command> with an alternate pattern");
- E("The <command> syntax is:");
- E(" [s]/<pat>/<str>/[g]\n");
- E("The leading s, if present, is ignored. If the trailing g is there,");
- E("all (rather than just the first) possible substitutions are made");
- E("on a line. The delimiter (/ above) can be any character. <pat> is");
- E("a grep-like regular expression (type \"grep --\" for more details");
- E("about regular expressions). <str> is the string that will replace");
- E("<pat> in the outpt file. It can be any ASCII string but the ");
- E("following are treated specially when found in a <str>:");
- E("");
- E("\\e is mapped to ESC \\t is mapped to HT \\s is mapped to ' '");
- E("\\n is mapped to LF \\b is mapped to BS \\& is mapped to &");
- E("\\xDD is mapped to the hex number DD.");
- E("\\<anything else> is mapped to <anything else>");
- E("& is mapped to the string that matched <pat>");
-
- exit( 1 );
- }
-
- /*------------------------------------------------------------------*/
-
- main( argc, argv )
- char **argv;
- {
- register int numsubst = 0;
- char *p ;
- int isglobal = 0;
- TOKEN *pat ;
- int delim ;
- FILE *fp = stdin ;
- static char buf[BSIZE] ;
- static int use_stdin ;
-
- ctlc();
- reargv( &argc, &argv );
-
- E("SUB: Copyright (c) 1986, Allen I. Holub. All rights reserved\n");
-
-
- if( argc < 2 || (argc > 1 && argv[1][0] == '-') )
- usage();
-
- use_stdin = (--argc==1); /* Use standard input as the input */
- /* stream if argc == 1. */
-
- p = *++argv;
- pat = getpat( &p, &isglobal );
-
-
- if( use_stdin )
- while( fgets(buf, BSIZE, stdin) )
- numsubst += subst(buf, pat, p, isglobal );
-
- else for( ++argv; --argc > 0 ; argv++ )
- {
- if( !(fp = fopen(*argv, "r")) )
- {
- fprintf(stderr,"Can't open %s\n", *argv );
- exit( 1 );
- }
-
- while( fgets(buf, BSIZE, fp) )
- numsubst += subst(buf, pat, p, isglobal );
-
- fclose( fp );
- }
-
-
- fprintf(stderr, "\n%d substitutions made\n", numsubst );
- }
-