home *** CD-ROM | disk | FTP | other *** search
/ OS/2 Shareware BBS: 5 Edit / 05-Edit.zip / SED.ZIP / SEDCOMP.C < prev    next >
C/C++ Source or Header  |  1989-07-30  |  38KB  |  823 lines

  1. /* sedcomp.c -- stream editor main and compilation phase
  2.  
  3.    The stream editor compiles its command input  (from files or -e options)
  4. into an internal form using compile() then executes the compiled form using
  5. execute(). Main() just initializes data structures, interprets command line
  6. options, and calls compile() and execute() in appropriate sequence.
  7.    The data structure produced by compile() is an array of compiled-command
  8. structures (type sedcmd).  These contain several pointers into pool[], the
  9. regular-expression and text-data pool, plus a command code and g & p flags.
  10. In the special case that the command is a label the struct  will hold a ptr
  11. into the labels array labels[] during most of the compile,  until resolve()
  12. resolves references at the end.
  13.    The operation of execute() is described in its source module.
  14.  
  15. ==== Written for the GNU operating system by Eric S. Raymond ==== */
  16.  
  17. #include <stdio.h>              /* uses getc, fprintf, fopen, fclose */
  18. #include "sed.h"                /* command type struct and name defines */
  19. #include "sed.dcl"
  20. /* imported functions */
  21. extern int strcmp();            /* test strings for equality */
  22. extern void execute();          /* execute compiled command */
  23.  
  24. /***** public stuff ******/
  25.  
  26. #define MAXCMDS         200     /* maximum number of compiled commands */
  27. #define MAXLINES        256     /* max # numeric addresses to compile */
  28.  
  29. /* main data areas */
  30. char    linebuf[MAXBUF+1];      /* current-line buffer */
  31. sedcmd  cmds[MAXCMDS+1];        /* hold compiled commands */
  32. long    linenum[MAXLINES];      /* numeric-addresses table */
  33.  
  34. /* miscellaneous shared variables */
  35. int     nflag;                  /* -n option flag */
  36. int     eargc;                  /* scratch copy of argument count */
  37. sedcmd  *pending        = NULL; /* next command to be executed */
  38. char    bits[]          = {1,2,4,8,16,32,64,128};
  39.  
  40. /***** module common stuff *****/
  41.  
  42. #define POOLSIZE        10000   /* size of string-pool space */
  43. #define WFILES          10      /* max # w output files that can be compiled */
  44. #define RELIMIT         256     /* max chars in compiled RE */
  45. #define MAXDEPTH        20      /* maximum {}-nesting level */
  46. #define MAXLABS         50      /* max # of labels that can be handled */
  47.  
  48. #define SKIPWS(pc)      while ((*pc==' ') || (*pc=='\t')) pc++
  49. #define ABORT(msg)      (fprintf(stderr, msg, linebuf), exit(2))
  50. #define IFEQ(x, v)      if (*x == v) x++ , /* do expression */
  51.  
  52. /* error messages */
  53. static char     AGMSG[] = "sed: garbled address %s\n";
  54. static char     CGMSG[] = "sed: garbled command %s\n";
  55. static char     TMTXT[] = "sed: too much text: %s\n";
  56. static char     AD1NG[] = "sed: no addresses allowed for %s\n";
  57. static char     AD2NG[] = "sed: only one address allowed for %s\n";
  58. static char     TMCDS[] = "sed: too many commands, last was %s\n";
  59. static char     COCFI[] = "sed: cannot open command-file %s\n";
  60. static char     UFLAG[] = "sed: unknown flag %c\n";
  61. static char     COOFI[] = "sed: cannot open %s\n";
  62. static char     CCOFI[] = "sed: cannot create %s\n";
  63. static char     ULABL[] = "sed: undefined label %s\n";
  64. static char     TMLBR[] = "sed: too many {'s\n";
  65. static char     FRENL[] = "sed: first RE must be non-null\n";
  66. static char     NSCAX[] = "sed: no such command as %s\n";
  67. static char     TMRBR[] = "sed: too many }'s\n";
  68. static char     DLABL[] = "sed: duplicate label %s\n";
  69. static char     TMLAB[] = "sed: too many labels: %s\n";
  70. static char     TMWFI[] = "sed: too many w files\n";
  71. static char     REITL[] = "sed: RE too long: %s\n";
  72. static char     TMLNR[] = "sed: too many line numbers\n";
  73. static char     TRAIL[] = "sed: command \"%s\" has trailing garbage\n";
  74.  
  75. typedef struct lbl_struct               /* represent a command label */
  76. {
  77.         char            *name;          /* the label name */
  78.         sedcmd          *last;          /* it's on the label search list */
  79.         sedcmd          *address;       /* pointer to the cmd it labels */
  80. }
  81. label;
  82.  
  83. /* label handling */
  84. static label    labels[MAXLABS];        /* here's the label table */
  85. static label    *lab    = labels + 1;   /* pointer to current label */
  86. static label    *lablst = labels;       /* header for search list */
  87.  
  88. /* string pool for regular expressions, append text, etc. etc. */
  89. static char     pool[POOLSIZE];                 /* the pool */
  90. static char     *fp     = pool;                 /* current pool pointer */
  91. static char     *poolend = pool + POOLSIZE;     /* pointer past pool end */
  92.  
  93. /* compilation state */
  94. static FILE     *cmdf   = NULL;         /* current command source */
  95. static char     *cp     = linebuf;      /* compile pointer */
  96. static sedcmd   *cmdp   = cmds;         /* current compiled-cmd ptr */
  97. static char     *lastre = NULL;         /* old RE pointer */
  98. static int      bdepth  = 0;            /* current {}-nesting level */
  99. static int      bcount  = 0;            /* # tagged patterns in current RE */
  100. static char     **eargv;                /* scratch copy of argument list */
  101.  
  102. /* compilation flags */
  103. static int      eflag;                  /* -e option flag */
  104. static int      gflag;                  /* -g option flag */
  105.  
  106.  
  107. main(argc, argv)
  108. /* main sequence of the stream editor */
  109. int     argc;
  110. char    *argv[];
  111. {
  112.         void compile(), resolve();
  113.  
  114.         eargc   = argc;         /* set local copy of argument count */
  115.         eargv   = argv;         /* set local copy of argument list */
  116.         cmdp->addr1 = pool;     /* 1st addr expand will be at pool start */
  117.         if (eargc == 1)
  118.                 exit(0);        /* exit immediately if no arguments */
  119. PASS("main(): setup");
  120.         /* scan through the arguments, interpreting each one */
  121.         while ((--eargc > 0) && (**++eargv == '-'))
  122.                 switch (eargv[0][1])
  123.                 {
  124.                 case 'e':
  125.                         eflag++; compile();     /* compile with e flag on */
  126.                         eflag = 0;
  127.                         continue;               /* get another argument */
  128.                 case 'f':
  129.                         if (eargc-- <= 0)       /* barf if no -f file */
  130.                                 exit(2);
  131.                         if ((cmdf = fopen(*++eargv, "r")) == NULL)
  132.                         {
  133.                                 fprintf(stderr, COCFI, *eargv);
  134.                                 exit(2);
  135.                         }
  136.                         compile();      /* file is O.K., compile it */
  137.                         fclose(cmdf);
  138.                         continue;       /* go back for another argument */
  139.                 case 'g':
  140.                         gflag++;        /* set global flag on all s cmds */
  141.                         continue;
  142.                 case 'n':
  143.                         nflag++;        /* no print except on p flag or w */
  144.                         continue;
  145.                 default:
  146.                         fprintf(stdout, UFLAG, eargv[0][1]);
  147.                         continue;
  148.                 }
  149.  
  150. PASS("main(): argscan");
  151.  
  152.         if (cmdp == cmds)       /* no commands have been compiled */
  153.         {
  154.                 eargv--; eargc++;
  155.                 eflag++; compile(); eflag = 0;
  156.                 eargv++; eargc--;
  157.         }
  158.  
  159.         if (bdepth)     /* we have unbalanced squigglies */
  160.                 ABORT(TMLBR);
  161.  
  162.         lablst->address = cmdp; /* set up header of label linked list */
  163.         resolve();              /* resolve label table indirections */
  164. PASS("main(): resolve");
  165.         if (eargc <= 0)         /* if there were no -e commands */
  166.                 execute(NULL);  /*   execute commands from stdin only */
  167.         else while(--eargc>=0)  /* else execute only -e commands */
  168.                 execute(*eargv++);
  169. PASS("main(): end & exit OK");
  170.         exit(0);                /* everything was O.K. if we got here */
  171. }
  172.  
  173.  
  174. #define H       0x80    /* 128 bit, on if there's really code for command */
  175. #define LOWCMD  56      /* = '8', lowest char indexed in cmdmask */
  176.  
  177. /* indirect through this to get command internal code, if it exists */
  178. static char     cmdmask[] =
  179. {
  180.         0,      0,      H,      0,      0,      H+EQCMD,0,      0,
  181.         0,      0,      0,      0,      H+CDCMD,0,      0,      CGCMD,
  182.         CHCMD,  0,      0,      0,      0,      0,      CNCMD,  0,
  183.         CPCMD,  0,      0,      0,      H+CTCMD,0,      0,      H+CWCMD,
  184.         0,      0,      0,      0,      0,      0,      0,      0,
  185.         0,      H+ACMD, H+BCMD, H+CCMD, DCMD,   0,      0,      GCMD,
  186.         HCMD,   H+ICMD, 0,      0,      H+LCMD, 0,      NCMD,   0,
  187.         PCMD,   H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0,      0,      H+WCMD,
  188.         XCMD,   H+YCMD, 0,      H+BCMD, 0,      H,      0,      0,
  189. };
  190.  
  191. static void compile()
  192. /* precompile sed commands out of a file */
  193. {
  194.         char            ccode, *address();
  195.  
  196. PASS("compile(): entry");
  197.  
  198.         for(;;)                                 /* main compilation loop */
  199.         {
  200.                 if (*cp != ';')                 /* get a new command line */
  201.                         if (cmdline(cp = linebuf) < 0)
  202.                                 break;
  203.                 SKIPWS(cp);
  204.                 if (*cp=='\0' || *cp=='#')      /* a comment */
  205.                         continue;
  206.                 if (*cp == ';')                 /* ; separates cmds */
  207.                 {
  208.                         cp++;
  209.                         continue;
  210.                 }
  211.  
  212.                 /* compile first address */
  213.                 if (fp > poolend)
  214.                         ABORT(TMTXT);
  215.                 else if ((fp = address(cmdp->addr1 = fp)) == BAD)
  216.                         ABORT(AGMSG);
  217.  
  218.                 if (fp == cmdp->addr1)          /* if empty RE was found */
  219.                 {
  220.                         if (lastre)             /* if there was previous RE */
  221.                                 cmdp->addr1 = lastre;   /* use it */
  222.                         else
  223.                                 ABORT(FRENL);
  224.                 }
  225.                 else if (fp == NULL)            /* if fp was NULL */
  226.                 {
  227.                         fp = cmdp->addr1;       /* use current pool location */
  228.                         cmdp->addr1 = NULL;
  229.                 }
  230.                 else
  231.                 {
  232.                         lastre = cmdp->addr1;
  233.                         if (*cp == ',' || *cp == ';')   /* there's 2nd addr */
  234.                         {
  235.                                 cp++;
  236.                                 if (fp > poolend) ABORT(TMTXT);
  237.                                 fp = address(cmdp->addr2 = fp);
  238.                                 if (fp == BAD || fp == NULL) ABORT(AGMSG);
  239.                                 if (fp == cmdp->addr2)
  240.                                         cmdp->addr2 = lastre;
  241.                                 else
  242.                                         lastre = cmdp->addr2;
  243.                         }
  244.                         else
  245.                                 cmdp->addr2 = NULL;     /* no 2nd address */
  246.                 }
  247.                 if (fp > poolend) ABORT(TMTXT);
  248.  
  249.                 SKIPWS(cp);             /* discard whitespace after address */
  250.                 IFEQ(cp, '!') cmdp->flags.allbut = 1;
  251.  
  252.                 SKIPWS(cp);             /* get cmd char, range-check it */
  253.                 if ((*cp < LOWCMD) || (*cp > '~')
  254.                         || ((ccode = cmdmask[*cp - LOWCMD]) == 0))
  255.                                 ABORT(NSCAX);
  256.  
  257.                 cmdp->command = ccode & ~H;     /* fill in command value */
  258.                 if ((ccode & H) == 0)           /* if no compile-time code */
  259.                         cp++;                   /* discard command char */
  260.                 else if (cmdcomp(*cp++))        /* execute it; if ret = 1 */
  261.                         continue;               /* skip next line read */
  262.  
  263.                 if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS);
  264.  
  265.                 SKIPWS(cp);                     /* look for trailing stuff */
  266.                 if (*cp != '\0')
  267.                         if (*++cp == ';')
  268.                                 continue;
  269.                         else if (cp[-1] != '#')
  270.                                 ABORT(TRAIL);
  271.         }
  272. }
  273.  
  274. static int cmdcomp(cchar)
  275. /* compile a single command */
  276. register char   cchar;          /* character name of command */
  277. {
  278.         char            *gettext(), *rhscomp(), *recomp(), *ycomp();
  279.         static sedcmd   **cmpstk[MAXDEPTH];     /* current cmd stack for {} */
  280.         static char     *fname[WFILES];         /* w file name pointers */
  281.         static FILE     *fout[WFILES]={stdout}; /* w file file ptrs */
  282.         static int      nwfiles = 1;            /* count of open w files */
  283.         int             i;                      /* indexing dummy used in w */
  284.         sedcmd          *sp1, *sp2;             /* temps for label searches */
  285.         label           *lpt, *search();        /* ditto, and the searcher */
  286.         char            redelim;                /* current RE delimiter */
  287.  
  288.         switch(cchar)
  289.         {
  290.         case '{':       /* start command group */
  291.                 cmdp->flags.allbut = !cmdp->flags.allbut;
  292.                 cmpstk[bdepth++] = &(cmdp->u.link);
  293.                 if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS);
  294.                 if (*cp == '\0') *cp = ';';     /* get next cmd w/o lineread */
  295.                 return(1);
  296.  
  297.         case '}':       /* end command group */
  298.                 if (cmdp->addr1) ABORT(AD1NG);  /* no addresses allowed */
  299.                 if (--bdepth < 0) ABORT(TMRBR); /* too many right braces */
  300.                 *cmpstk[bdepth] = cmdp;         /* set the jump address */
  301.                 return(1);
  302.  
  303.         case '=':                       /* print current source line number */
  304.         case 'q':                       /* exit the stream editor */
  305.                 if (cmdp->addr2) ABORT(AD2NG);
  306.                 break;
  307.  
  308.         case ':':       /* label declaration */
  309.                 if (cmdp->addr1) ABORT(AD1NG);  /* no addresses allowed */
  310.                 fp = gettext(lab->name = fp);   /* get the label name */
  311.                 if (lpt = search(lab))          /* does it have a double? */
  312.                 {
  313.                         if (lpt->address) ABORT(DLABL); /* yes, abort */
  314.                 }
  315.                 else    /* check that it doesn't overflow label table */
  316.                 {
  317.                         lab->last = NULL;
  318.                         lpt = lab;
  319.                         if (++lab >= labels + MAXLABS) ABORT(TMLAB);
  320.                 }
  321.                 lpt->address = cmdp;
  322.                 return(1);
  323.  
  324.         case 'b':       /* branch command */
  325.         case 't':       /* branch-on-succeed command */
  326.         case 'T':       /* branch-on-fail command */
  327.                 SKIPWS(cp);
  328.                 if (*cp == '\0')        /* if branch is to start of cmds... */
  329.                 {
  330.                         /* add current command to end of label last */
  331.                         if (sp1 = lablst->last)
  332.                         {
  333.                                 while(sp2 = sp1->u.link)
  334.                                         sp1 = sp2;
  335.                                 sp1->u.link = cmdp;
  336.                         }
  337.                         else    /* lablst->last == NULL */
  338.                                 lablst->last = cmdp;
  339.                         break;
  340.                 }
  341.                 fp = gettext(lab->name = fp);   /* else get label into pool */
  342.                 if (lpt = search(lab))          /* enter branch to it */
  343.                 {
  344.                         if (lpt->address)
  345.                                 cmdp->u.link = lpt->address;
  346.                         else
  347.                         {
  348.                                 sp1 = lpt->last;
  349.                                 while(sp2 = sp1->u.link)
  350.                                         sp1 = sp2;
  351.                                 sp1->u.link = cmdp;
  352.                         }
  353.                 }
  354.                 else            /* matching named label not found */
  355.                 {
  356.                         lab->last = cmdp;       /* add the new label */
  357.                         lab->address = NULL;    /* it's forward of here */
  358.                         if (++lab >= labels + MAXLABS)  /* overflow if last */
  359.                                 ABORT(TMLAB);
  360.                 }
  361.                 break;
  362.  
  363.         case 'a':       /* append text */
  364.         case 'i':       /* insert text */
  365.         case 'r':       /* read file into stream */
  366.                 if (cmdp->addr2) ABORT(AD2NG);
  367.         case 'c':       /* change text */
  368.                 if ((*cp == '\\') && (*++cp == '\n')) cp++;
  369.                 fp = gettext(cmdp->u.lhs = fp);
  370.                 break;
  371.  
  372.         case 'D':       /* delete current line in hold space */
  373.                 cmdp->u.link = cmds;
  374.                 break;
  375.  
  376.         case 's':       /* substitute regular expression */
  377.                 redelim = *cp++;                /* get delimiter from 1st ch */
  378.                 if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD)
  379.                         ABORT(CGMSG);
  380.                 if (fp == cmdp->u.lhs)          /* if compiled RE zero len */
  381.                         cmdp->u.lhs = lastre;   /*   use the previous one */
  382.                 else                            /* otherwise */
  383.                         lastre = cmdp->u.lhs;   /*   save the one just found */
  384.                 if ((cmdp->rhs = fp) > poolend) ABORT(TMTXT);
  385.                 if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) ABORT(CGMSG);
  386.                 if (gflag) cmdp->flags.global++;
  387.                 while (*cp == 'g' || *cp == 'p' || *cp == 'P')
  388.                 {
  389.                         IFEQ(cp, 'g') cmdp->flags.global++;
  390.                         IFEQ(cp, 'p') cmdp->flags.print = 1;
  391.                         IFEQ(cp, 'P') cmdp->flags.print = 2;
  392.                 }
  393.  
  394.         case 'l':       /* list pattern space */
  395.                 if (*cp == 'w')
  396.                         cp++;           /* and execute a w command! */
  397.                 else
  398.                         break;          /* s or l is done */
  399.  
  400.         case 'w':       /* write-pattern-space command */
  401.         case 'W':       /* write-first-line command */
  402.                 if (nwfiles >= WFILES) ABORT(TMWFI);
  403.                 fp=gettext(fname[nwfiles]=fp);  /* filename will be in pool */
  404.                 for(i = nwfiles-1; i >= 0; i--) /* match it in table */
  405.                         if (strcmp(fname[nwfiles], fname[i]) == 0)
  406.                         {
  407.                                 cmdp->fout = fout[i];
  408.                                 return(0);
  409.                         }
  410.                 /* if didn't find one, open new out file */
  411.                 if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL)
  412.                 {
  413.                         fprintf(stderr, CCOFI, fname[nwfiles]);
  414.                         perror(fname[nwfiles]);
  415.                         exit(2);
  416.                 }
  417.                 fout[nwfiles++] = cmdp->fout;
  418.                 break;
  419.  
  420.         case 'y':       /* transliterate text */
  421.                 fp = ycomp(cmdp->u.lhs = fp, *cp++);    /* compile translit */
  422.                 if (fp == BAD) ABORT(CGMSG);            /* fail on bad form */
  423.                 if (fp > poolend) ABORT(TMTXT);         /* fail on overflow */
  424.                 break;
  425.         }
  426.         return(0);      /* succeeded in interpreting one command */
  427. }
  428.  
  429. static char *rhscomp(rhsp, delim)       /* uses bcount */
  430. /* generate replacement string for substitute command right hand side */
  431. register char   *rhsp;          /* place to compile expression to */
  432. register char   delim;          /* regular-expression end-mark to look for */
  433. {
  434.         register char   *p = cp;                /* strictly for speed */
  435.  
  436.         for(;;)
  437.                 if ((*rhsp = *p++) == '\\')     /* copy; if it's a \, */
  438.                 {
  439.                         *rhsp = *p++;           /* copy escaped char */
  440.                         /* check validity of pattern tag */
  441.                         if (*rhsp > bcount + '0' && *rhsp <= '9')
  442.                                 return(BAD);
  443.                         *rhsp++ |= 0x80;        /* mark the good ones */
  444.                         continue;
  445.                 }
  446.                 else if (*rhsp == delim)        /* found RE end, hooray... */
  447.                 {
  448.                         *rhsp++ = '\0';         /* cap the expression string */
  449.                         cp = p;
  450.                         return(rhsp);           /* pt at 1 past the RE */
  451.                 }
  452.                 else if (*rhsp++ == '\0')       /* last ch not RE end, help! */
  453.                         return(BAD);
  454. }
  455.  
  456. static char *recomp(expbuf, redelim)    /* uses cp, bcount */
  457. /* compile a regular expression to internal form */
  458. char    *expbuf;                        /* place to compile it to */
  459. char    redelim;                        /* RE end-marker to look for */
  460. {
  461.         register char   *ep = expbuf;   /* current-compiled-char pointer */
  462.         register char   *sp = cp;       /* source-character ptr */
  463.         register int    c;              /* current-character pointer */
  464.         char            negclass;       /* all-but flag */
  465.         char            *lastep;        /* ptr to last expr compiled */
  466.         char            *svclass;       /* start of current char class */
  467.         char            brnest[MAXTAGS];        /* bracket-nesting array */
  468.         char            *brnestp;       /* ptr to current bracket-nest */
  469.         char            *pp;            /* scratch pointer */
  470.         int             classct;        /* class element count */
  471.         int             tags;           /* # of closed tags */
  472.  
  473.         if (*cp == redelim)             /* if first char is RE endmarker */
  474.                 return(cp++, expbuf);   /* leave existing RE unchanged */
  475.  
  476.         lastep = NULL;                  /* there's no previous RE */
  477.         brnestp = brnest;               /* initialize ptr to brnest array */
  478.         tags = bcount = 0;              /* initialize counters */
  479.  
  480.         if (*ep++ = (*sp == '^'))       /* check for start-of-line syntax */
  481.                 sp++;
  482.  
  483.         for (;;)
  484.         {
  485.                 if (ep >= expbuf + RELIMIT)     /* match is too large */
  486.                         return(cp = sp, BAD);
  487.                 if ((c = *sp++) == redelim)     /* found the end of the RE */
  488.                 {
  489.                         cp = sp;
  490.                         if (brnestp != brnest)  /* \(, \) unbalanced */
  491.                                 return(BAD);
  492.                         *ep++ = CEOF;           /* write end-of-pattern mark */
  493.                         return(ep);             /* return ptr to compiled RE */
  494.                 }
  495.                 if ((c != '*') && (c != '+'))   /* if we're a postfix op */
  496.                         lastep = ep;            /*   get ready to match last */
  497.  
  498.                 switch (c)
  499.                 {
  500.                 case '\\':
  501.                         if ((c = *sp++) == '(') /* start tagged section */
  502.                         {
  503.                                 if (bcount >= MAXTAGS)
  504.                                         return(cp = sp, BAD);
  505.                                 *brnestp++ = bcount;    /* update tag stack */
  506.                                 *ep++ = CBRA;           /* enter tag-start */
  507.                                 *ep++ = bcount++;       /* bump tag count */
  508.                                 continue;
  509.                         }
  510.                         else if (c == ')')      /* end tagged section */
  511.                         {
  512.                                 if (brnestp <= brnest)  /* extra \) */
  513.                                         return(cp = sp, BAD);
  514.                                 *ep++ = CKET;           /* enter end-of-tag */
  515.                                 *ep++ = *--brnestp;     /* pop tag stack */
  516.                                 tags++;                 /* count closed tags */
  517.                                 continue;
  518.                         }
  519.                         else if (c >= '1' && c <= '9')  /* tag use */
  520.                         {
  521.                                 if ((c -= '1') >= tags) /* too few */
  522.                                         return(BAD);
  523.                                 *ep++ = CBACK;          /* enter tag mark */
  524.                                 *ep++ = c;              /* and the number */
  525.                                 continue;
  526.                         }
  527.                         else if (c == '\n')     /* escaped newline no good */
  528.                                 return(cp = sp, BAD);
  529.                         else if (c == 'n')              /* match a newline */
  530.                                 c = '\n';
  531.                         else if (c == 't')              /* match a tab */
  532.                                 c = '\t';
  533.                         else
  534.                                 goto defchar;           /* else match \c */
  535.  
  536.                 case '\0':      /* ignore nuls */
  537.                         continue;
  538.  
  539.                 case '\n':      /* trailing pattern delimiter is missing */
  540.                         return(cp = sp, BAD);
  541.  
  542.                 case '.':       /* match any char except newline */
  543.                         *ep++ = CDOT;
  544.                         continue;
  545.  
  546.                 case '+':       /* 1 to n repeats of previous pattern */
  547.                         if (lastep == NULL)     /* if + not first on line */
  548.                                 goto defchar;   /*   match a literal + */
  549.                         if (*lastep == CKET)    /* can't iterate a tag */
  550.                                 return(cp = sp, BAD);
  551.                         pp = ep;                /* else save old ep */
  552.                         while (lastep < pp)     /* so we can blt the pattern */
  553.                                 *ep++ = *lastep++;
  554.                         *lastep |= STAR;        /* flag the copy */
  555.                         continue;
  556.  
  557.                 case '*':       /* 0..n repeats of previous pattern */
  558.                         if (lastep == NULL)     /* if * isn't first on line */
  559.                                 goto defchar;   /*   match a literal * */
  560.                         if (*lastep == CKET)    /* can't iterate a tag */
  561.                                 return(cp = sp, BAD);
  562.                         *lastep |= STAR;        /* flag previous pattern */
  563.                         continue;
  564.  
  565.                 case '$':       /* match only end-of-line */
  566.                         if (*sp != redelim)     /* if we're not at end of RE */
  567.                                 goto defchar;   /*   match a literal $ */
  568.                         *ep++ = CDOL;           /* insert end-symbol mark */
  569.                         continue;
  570.  
  571.                 case '[':       /* begin character set pattern */
  572.                         if (ep + 17 >= expbuf + RELIMIT)
  573.                                 ABORT(REITL);
  574.                         *ep++ = CCL;            /* insert class mark */
  575.                         if (negclass = ((c = *sp++) == '^'))
  576.                                 c = *sp++;
  577.                         svclass = sp;           /* save ptr to class start */
  578.                         do {
  579.                                 if (c == '\0') ABORT(CGMSG);
  580.  
  581.                                 /* handle character ranges */
  582.                                 if (c == '-' && sp > svclass && *sp != ']')
  583.                                         for (c = sp[-2]; c < *sp; c++)
  584.                                                 ep[c >> 3] |= bits[c & 7];
  585.  
  586.                                 /* handle escape sequences in sets */
  587.                                 if (c == '\\')
  588.                                         if ((c = *sp++) == 'n')
  589.                                                 c = '\n';
  590.                                         else if (c == 't')
  591.                                                 c = '\t';
  592.  
  593.                                 /* enter (possibly translated) char in set */
  594.                                 ep[c >> 3] |= bits[c & 7];
  595.                         } while
  596.                                 ((c = *sp++) != ']');
  597.  
  598.                         /* invert the bitmask if all-but was specified */
  599.                         if (negclass)
  600.                                 for(classct = 0; classct < 16; classct++)
  601.                                         ep[classct] ^= 0xFF;
  602.                         ep[0] &= 0xFE;          /* never match ASCII 0 */
  603.                         ep += 16;               /* advance ep past set mask */
  604.                         continue;
  605.  
  606.                 defchar:        /* match literal character */
  607.                 default:        /* which is what we'd do by default */
  608.                         *ep++ = CCHR;           /* insert character mark */
  609.                         *ep++ = c;
  610.                 }
  611.         }
  612. }
  613.  
  614. static int cmdline(cbuf)                /* uses eflag, eargc, cmdf */
  615. /* read next command from -e argument or command file */
  616. register char   *cbuf;
  617. {
  618.         register int    inc;    /* not char because must hold EOF */
  619.  
  620.         cbuf--;                 /* so pre-increment points us at cbuf */
  621.  
  622.         /* e command flag is on */
  623.         if (eflag)
  624.         {
  625.                 register char   *p;     /* ptr to current -e argument */
  626.                 static char     *savep; /* saves previous value of p */
  627.  
  628.                 if (eflag > 0)  /* there are pending -e arguments */
  629.                 {
  630.                         eflag = -1;
  631.                         if (eargc-- <= 0)
  632.                                 exit(2);        /* if no arguments, barf */
  633.  
  634.                         /* else transcribe next e argument into cbuf */
  635.                         p = *++eargv;
  636.                         while(*++cbuf = *p++)
  637.                                 if (*cbuf == '\\')
  638.                                 {
  639.                                         if ((*++cbuf = *p++) == '\0')
  640.                                                 return(savep = NULL, -1);
  641.                                         else
  642.                                                 continue;
  643.                                 }
  644.                                 else if (*cbuf == '\n') /* end of 1 cmd line */
  645.                                 {
  646.                                         *cbuf = '\0';
  647.                                         return(savep = p, 1);
  648.                                         /* we'll be back for the rest... */
  649.                                 }
  650.  
  651.                         /* found end-of-string; can advance to next argument */
  652.                         return(savep = NULL, 1);
  653.                 }
  654.  
  655.                 if ((p = savep) == NULL)
  656.                         return(-1);
  657.  
  658.                 while(*++cbuf = *p++)
  659.                         if (*cbuf == '\\')
  660.                         {
  661.                                 if ((*++cbuf = *p++) == '0')
  662.                                         return(savep = NULL, -1);
  663.                                 else
  664.                                         continue;
  665.                         }
  666.                         else if (*cbuf == '\n')
  667.                         {
  668.                                 *cbuf = '\0';
  669.                                 return(savep = p, 1);
  670.                         }
  671.  
  672.                 return(savep = NULL, 1);
  673.         }
  674.  
  675.         /* if no -e flag read from command file descriptor */
  676.         while((inc = getc(cmdf)) != EOF)                /* get next char */
  677.                 if ((*++cbuf = inc) == '\\')            /* if it's escape */
  678.                         *++cbuf = inc = getc(cmdf);     /* get next char */
  679.                 else if (*cbuf == '\n')                 /* end on newline */
  680.                         return(*cbuf = '\0', 1);        /* cap the string */
  681.  
  682.         return(*++cbuf = '\0', -1);     /* end-of-file, no more chars */
  683. }
  684.  
  685. static char *address(expbuf)            /* uses cp, linenum */
  686. /* expand an address at *cp... into expbuf, return ptr at following char */
  687. register char   *expbuf;
  688. {
  689.         static int      numl = 0;       /* current ind in addr-number table */
  690.         register char   *rcp;           /* temp compile ptr for forwd look */
  691.         long            lno;            /* computed value of numeric address */
  692.  
  693.         if (*cp == '$')                 /* end-of-source address */
  694.         {
  695.                 *expbuf++ = CEND;       /* write symbolic end address */
  696.                 *expbuf++ = CEOF;       /* and the end-of-address mark (!) */
  697.                 cp++;                   /* go to next source character */
  698.                 return(expbuf);         /* we're done */
  699.         }
  700.         if (*cp == '/')                 /* start of regular-expression match */
  701.                 return(recomp(expbuf, *cp++));  /* compile the RE */
  702.  
  703.         rcp = cp; lno = 0;              /* now handle a numeric address */
  704.         while(*rcp >= '0' && *rcp <= '9')       /* collect digits */
  705.                 lno = lno*10 + *rcp++ - '0';    /*  compute their value */
  706.  
  707.         if (rcp > cp)                   /* if we caught a number... */
  708.         {
  709.                 *expbuf++ = CLNUM;      /* put a numeric-address marker */
  710.                 *expbuf++ = numl;       /* and the address table index */
  711.                 linenum[numl++] = lno;  /* and set the table entry */
  712.                 if (numl >= MAXLINES)   /* oh-oh, address table overflow */
  713.                         ABORT(TMLNR);   /*   abort with error message */
  714.                 *expbuf++ = CEOF;       /* write the end-of-address marker */
  715.                 cp = rcp;               /* point compile past the address */
  716.                 return(expbuf);         /* we're done */
  717.         }
  718.  
  719.         return(NULL);           /* no legal address was found */
  720. }
  721.  
  722. static char *gettext(txp)               /* uses global cp */
  723. /* accept multiline input from *cp..., discarding leading whitespace */
  724. register char   *txp;                   /* where to put the text */
  725. {
  726.         register char   *p = cp;        /* this is for speed */
  727.  
  728.         SKIPWS(p);                      /* discard whitespace */
  729.         do {
  730.                 if ((*txp = *p++) == '\\')      /* handle escapes */
  731.                         *txp = *p++;
  732.                 if (*txp == '\0')               /* we're at end of input */
  733.                         return(cp = --p, ++txp);
  734.                 else if (*txp == '\n')          /* also SKIPWS after newline */
  735.                         SKIPWS(p);
  736.         } while
  737.                 (txp++);                /* keep going till we find that nul */
  738. }
  739.  
  740. static label *search(ptr)                       /* uses global lablst */
  741. /* find the label matching *ptr, return NULL if none */
  742. register label  *ptr;
  743. {
  744.         register label  *rp;
  745.         for(rp = lablst; rp < ptr; rp++)
  746.                 if (strcmp(rp->name, ptr->name) == 0)
  747.                         return(rp);
  748.         return(NULL);
  749. }
  750.  
  751. static void resolve()                           /* uses global lablst */
  752. /* write label links into the compiled-command space */
  753. {
  754.         register label          *lptr;
  755.         register sedcmd         *rptr, *trptr;
  756.  
  757.         /* loop through the label table */
  758.         for(lptr = lablst; lptr < lab; lptr++)
  759.                 if (lptr->address == NULL)      /* barf if not defined */
  760.                 {
  761.                         fprintf(stderr, ULABL, lptr->name);
  762.                         exit(2);
  763.                 }
  764.                 else if (lptr->last)            /* if last is non-null */
  765.                 {
  766.                         rptr = lptr->last;              /* chase it */
  767.                         while(trptr = rptr->u.link)     /* resolve refs */
  768.                         {
  769.                                 rptr->u.link = lptr->address;
  770.                                 rptr = trptr;
  771.                         }
  772.                         rptr->u.link = lptr->address;
  773.                 }
  774. }
  775.  
  776. static char *ycomp(ep, delim)
  777. /* compile a y (transliterate) command */
  778. register char   *ep;            /* where to compile to */
  779. char            delim;          /* end delimiter to look for */
  780. {
  781.         register char   c, *tp, *sp;
  782.  
  783.         /* scan the 'from' section for invalid chars */
  784.         for(sp = tp = cp; *tp != delim; tp++)
  785.         {
  786.                 if (*tp == '\\')
  787.                         tp++;
  788.                 if ((*tp == '\n') || (*tp == '\0'))
  789.                         return(BAD);
  790.         }
  791.         tp++;           /* tp now points at first char of 'to' section */
  792.  
  793.         /* now rescan the 'from' section */
  794.         while((c = *sp++ & 0x7F) != delim)
  795.         {
  796.                 if (c == '\\' && *sp == 'n')
  797.                 {
  798.                         sp++;
  799.                         c = '\n';
  800.                 }
  801.                 if ((ep[c] = *tp++) == '\\' && *tp == 'n')
  802.                 {
  803.                         ep[c] = '\n';
  804.                         tp++;
  805.                 }
  806.                 if ((ep[c] == delim) || (ep[c] == '\0'))
  807.                         return(BAD);
  808.         }
  809.  
  810.         if (*tp != delim)       /* 'to', 'from' parts have unequal lengths */
  811.                 return(BAD);
  812.  
  813.         cp = ++tp;                      /* point compile ptr past translit */
  814.  
  815.         for(c = 0; c < 128; c++)        /* fill in self-map entries in table */
  816.                 if (ep[c] == 0)
  817.                         ep[c] = c;
  818.  
  819.         return(ep + 0x80);      /* return first free location past table end */
  820. }
  821.  
  822. /* sedcomp.c ends here */
  823.