diff options
Diffstat (limited to 'src/cmd/9sed.c')
| -rw-r--r-- | src/cmd/9sed.c | 1447 |
1 files changed, 0 insertions, 1447 deletions
diff --git a/src/cmd/9sed.c b/src/cmd/9sed.c deleted file mode 100644 index 182163eb..00000000 --- a/src/cmd/9sed.c +++ /dev/null @@ -1,1447 +0,0 @@ -/* - * sed -- stream editor - * - * - */ -#include <u.h> -#include <libc.h> -#include <bio.h> -#include <regexp.h> - -enum { - DEPTH = 20, /* max nesting depth of {} */ - MAXCMDS = 512, /* max sed commands */ - ADDSIZE = 10000, /* size of add & read buffer */ - MAXADDS = 20, /* max pending adds and reads */ - LBSIZE = 8192, /* input line size */ - LABSIZE = 50, /* max label name size */ - MAXSUB = 10, /* max number of sub reg exp */ - MAXFILES = 120, /* max output files */ -}; - /* An address is a line #, a R.E., "$", a reference to the last - * R.E., or nothing. - */ -typedef struct { - enum { - A_NONE, - A_DOL, - A_LINE, - A_RE, - A_LAST, - }type; - union { - long line; /* Line # */ - Reprog *rp; /* Compiled R.E. */ - } u; -} Addr; - -typedef struct SEDCOM { - Addr ad1; /* optional start address */ - Addr ad2; /* optional end address */ - union { - Reprog *re1; /* compiled R.E. */ - Rune *text; /* added text or file name */ - struct SEDCOM *lb1; /* destination command of branch */ - } u; - Rune *rhs; /* Right-hand side of substitution */ - Biobuf* fcode; /* File ID for read and write */ - char command; /* command code -see below */ - char gfl; /* 'Global' flag for substitutions */ - char pfl; /* 'print' flag for substitutions */ - char active; /* 1 => data between start and end */ - char negfl; /* negation flag */ -} SedCom; - - /* Command Codes for field SedCom.command */ -#define ACOM 01 -#define BCOM 020 -#define CCOM 02 -#define CDCOM 025 -#define CNCOM 022 -#define COCOM 017 -#define CPCOM 023 -#define DCOM 03 -#define ECOM 015 -#define EQCOM 013 -#define FCOM 016 -#define GCOM 027 -#define CGCOM 030 -#define HCOM 031 -#define CHCOM 032 -#define ICOM 04 -#define LCOM 05 -#define NCOM 012 -#define PCOM 010 -#define QCOM 011 -#define RCOM 06 -#define SCOM 07 -#define TCOM 021 -#define WCOM 014 -#define CWCOM 024 -#define YCOM 026 -#define XCOM 033 - - -typedef struct label { /* Label symbol table */ - Rune asc[9]; /* Label name */ - SedCom *chain; - SedCom *address; /* Command associated with label */ -} Label; - -typedef struct FILE_CACHE { /* Data file control block */ - struct FILE_CACHE *next; /* Forward Link */ - char *name; /* Name of file */ -} FileCache; - -SedCom pspace[MAXCMDS]; /* Command storage */ -SedCom *pend = pspace+MAXCMDS; /* End of command storage */ -SedCom *rep = pspace; /* Current fill point */ - -Reprog *lastre = 0; /* Last regular expression */ -Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ - -Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ -Rune *addend = addspace+ADDSIZE; - -SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ -SedCom **aptr = abuf; - -struct { /* Sed program input control block */ - enum PTYPE /* Either on command line or in file */ - { P_ARG, - P_FILE - } type; - union PCTL { /* Pointer to data */ - Biobuf *bp; - char *curr; - } pctl; -} prog; - -Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ - -FileCache *fhead = 0; /* Head of File Cache Chain */ -FileCache *ftail = 0; /* Tail of File Cache Chain */ - -Rune *loc1; /* Start of pattern match */ -Rune *loc2; /* End of pattern match */ -Rune seof; /* Pattern delimiter char */ - -Rune linebuf[LBSIZE+1]; /* Input data buffer */ -Rune *lbend = linebuf+LBSIZE; /* End of buffer */ -Rune *spend = linebuf; /* End of input data */ -Rune *cp; /* Current scan point in linebuf */ - -Rune holdsp[LBSIZE+1]; /* Hold buffer */ -Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ -Rune *hspend = holdsp; /* End of hold data */ - -int nflag; /* Command line flags */ -int gflag; - -int dolflag; /* Set when at true EOF */ -int sflag; /* Set when substitution done */ -int jflag; /* Set when jump required */ -int delflag; /* Delete current line when set */ - -long lnum = 0; /* Input line count */ - -char fname[MAXFILES][40]; /* File name cache */ -Biobuf *fcode[MAXFILES]; /* File ID cache */ -int nfiles = 0; /* Cache fill point */ - -Biobuf fout; /* Output stream */ -Biobuf bstdin; /* Default input */ -Biobuf* f = 0; /* Input data */ - -Label ltab[LABSIZE]; /* Label name symbol table */ -Label *labend = ltab+LABSIZE; /* End of label table */ -Label *lab = ltab+1; /* Current Fill point */ - -int depth = 0; /* {} stack pointer */ - -Rune bad; /* Dummy err ptr reference */ -Rune *badp = &bad; - - -char CGMES[] = "Command garbled: %S"; -char TMMES[] = "Too much text: %S"; -char LTL[] = "Label too long: %S"; -char AD0MES[] = "No addresses allowed: %S"; -char AD1MES[] = "Only one address allowed: %S"; - -void address(Addr *); -void arout(void); -int cmp(char *, char *); -int rcmp(Rune *, Rune *); -void command(SedCom *); -Reprog *compile(void); -Rune *compsub(Rune *, Rune *); -void dechain(void); -void dosub(Rune *); -int ecmp(Rune *, Rune *, int); -void enroll(char *); -void errexit(void); -int executable(SedCom *); -void execute(void); -void fcomp(void); -long getrune(void); -Rune *gline(Rune *); -int match(Reprog *, Rune *); -void newfile(enum PTYPE, char *); -int opendata(void); -Biobuf *open_file(char *); -Rune *place(Rune *, Rune *, Rune *); -void quit(char *, char *); -int rline(Rune *, Rune *); -Label *search(Label *); -int substitute(SedCom *); -char *text(char *); -Rune *stext(Rune *, Rune *); -int ycomp(SedCom *); -char * trans(int c); -void putline(Biobuf *bp, Rune *buf, int n); - -void -main(int argc, char **argv) -{ - int compfl; - - lnum = 0; - Binit(&fout, 1, OWRITE); - fcode[nfiles++] = &fout; - compfl = 0; - - if(argc == 1) - exits(0); - ARGBEGIN{ - case 'n': - nflag++; - continue; - case 'f': - if(argc <= 1) - quit("no pattern-file", 0); - newfile(P_FILE, ARGF()); - fcomp(); - compfl = 1; - continue; - case 'e': - if (argc <= 1) - quit("missing pattern", 0); - newfile(P_ARG, ARGF()); - fcomp(); - compfl = 1; - continue; - case 'g': - gflag++; - continue; - default: - fprint(2, "sed: Unknown flag: %c\n", ARGC()); - continue; - } ARGEND - - if(compfl == 0) { - if (--argc < 0) - quit("missing pattern", 0); - newfile(P_ARG, *argv++); - fcomp(); - } - - if(depth) - quit("Too many {'s", 0); - - ltab[0].address = rep; - - dechain(); - - if(argc <= 0) - enroll(0); /* Add stdin to cache */ - else while(--argc >= 0) { - enroll(*argv++); - } - execute(); - exits(0); -} -void -fcomp(void) -{ - Rune *tp; - SedCom *pt, *pt1; - int i; - Label *lpt; - - static Rune *p = addspace; - static SedCom **cmpend[DEPTH]; /* stack of {} operations */ - - while (rline(linebuf, lbend) >= 0) { - cp = linebuf; -comploop: - while(*cp == ' ' || *cp == '\t') - cp++; - if(*cp == '\0' || *cp == '#') - continue; - if(*cp == ';') { - cp++; - goto comploop; - } - - address(&rep->ad1); - if (rep->ad1.type != A_NONE) { - if (rep->ad1.type == A_LAST) { - if (!lastre) - quit("First RE may not be null", 0); - rep->ad1.type = A_RE; - rep->ad1.u.rp = lastre; - } - if(*cp == ',' || *cp == ';') { - cp++; - address(&rep->ad2); - if (rep->ad2.type == A_LAST) { - rep->ad1.type = A_RE; - rep->ad2.u.rp = lastre; - } - } else - rep->ad2.type = A_NONE; - } - while(*cp == ' ' || *cp == '\t') - cp++; - -swit: - switch(*cp++) { - - default: - quit("Unrecognized command: %S", (char *)linebuf); - - case '!': - rep->negfl = 1; - goto swit; - - case '{': - rep->command = BCOM; - rep->negfl = !(rep->negfl); - cmpend[depth++] = &rep->u.lb1; - if(++rep >= pend) - quit("Too many commands: %S", (char *) linebuf); - if(*cp == '\0') continue; - goto comploop; - - case '}': - if(rep->ad1.type != A_NONE) - quit(AD0MES, (char *) linebuf); - if(--depth < 0) - quit("Too many }'s", 0); - *cmpend[depth] = rep; - if(*cp == 0) continue; - goto comploop; - - case '=': - rep->command = EQCOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - break; - - case ':': - if(rep->ad1.type != A_NONE) - quit(AD0MES, (char *) linebuf); - - while(*cp == ' ') - cp++; - tp = lab->asc; - while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { - *tp++ = *cp++; - if(tp >= &(lab->asc[8])) - quit(LTL, (char *) linebuf); - } - *tp = '\0'; - - if(lpt = search(lab)) { - if(lpt->address) - quit("Duplicate labels: %S", (char *) linebuf); - } else { - lab->chain = 0; - lpt = lab; - if(++lab >= labend) - quit("Too many labels: %S", (char *) linebuf); - } - lpt->address = rep; - if (*cp == '#') - continue; - rep--; /* reuse this slot */ - break; - - case 'a': - rep->command = ACOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - if(*cp == '\\') cp++; - if(*cp++ != '\n') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - case 'c': - rep->command = CCOM; - if(*cp == '\\') cp++; - if(*cp++ != '\n') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - case 'i': - rep->command = ICOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - if(*cp == '\\') cp++; - if(*cp++ != '\n') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - - case 'g': - rep->command = GCOM; - break; - - case 'G': - rep->command = CGCOM; - break; - - case 'h': - rep->command = HCOM; - break; - - case 'H': - rep->command = CHCOM; - break; - - case 't': - rep->command = TCOM; - goto jtcommon; - - case 'b': - rep->command = BCOM; -jtcommon: - while(*cp == ' ')cp++; - if(*cp == '\0') { - if(pt = ltab[0].chain) { - while(pt1 = pt->u.lb1) - pt = pt1; - pt->u.lb1 = rep; - } else - ltab[0].chain = rep; - break; - } - tp = lab->asc; - while((*tp++ = *cp++)) - if(tp >= &(lab->asc[8])) - quit(LTL, (char *) linebuf); - cp--; - tp[-1] = '\0'; - - if(lpt = search(lab)) { - if(lpt->address) { - rep->u.lb1 = lpt->address; - } else { - pt = lpt->chain; - while(pt1 = pt->u.lb1) - pt = pt1; - pt->u.lb1 = rep; - } - } else { - lab->chain = rep; - lab->address = 0; - if(++lab >= labend) - quit("Too many labels: %S", - (char *) linebuf); - } - break; - - case 'n': - rep->command = NCOM; - break; - - case 'N': - rep->command = CNCOM; - break; - - case 'p': - rep->command = PCOM; - break; - - case 'P': - rep->command = CPCOM; - break; - - case 'r': - rep->command = RCOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - if(*cp++ != ' ') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - - case 'd': - rep->command = DCOM; - break; - - case 'D': - rep->command = CDCOM; - rep->u.lb1 = pspace; - break; - - case 'q': - rep->command = QCOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - break; - - case 'l': - rep->command = LCOM; - break; - - case 's': - rep->command = SCOM; - seof = *cp++; - if ((rep->u.re1 = compile()) == 0) { - if(!lastre) - quit("First RE may not be null.", 0); - rep->u.re1 = lastre; - } - rep->rhs = p; - if((p = compsub(p, addend)) == 0) - quit(CGMES, (char *) linebuf); - if(*cp == 'g') { - cp++; - rep->gfl++; - } else if(gflag) - rep->gfl++; - - if(*cp == 'p') { - cp++; - rep->pfl = 1; - } - - if(*cp == 'P') { - cp++; - rep->pfl = 2; - } - - if(*cp == 'w') { - cp++; - if(*cp++ != ' ') - quit(CGMES, (char *) linebuf); - text(fname[nfiles]); - for(i = nfiles - 1; i >= 0; i--) - if(cmp(fname[nfiles],fname[i]) == 0) { - rep->fcode = fcode[i]; - goto done; - } - if(nfiles >= MAXFILES) - quit("Too many files in w commands 1", 0); - rep->fcode = open_file(fname[nfiles]); - } - break; - - case 'w': - rep->command = WCOM; - if(*cp++ != ' ') - quit(CGMES, (char *) linebuf); - text(fname[nfiles]); - for(i = nfiles - 1; i >= 0; i--) - if(cmp(fname[nfiles], fname[i]) == 0) { - rep->fcode = fcode[i]; - goto done; - } - if(nfiles >= MAXFILES){ - fprint(2, "sed: Too many files in w commands 2 \n"); - fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); - errexit(); - } - rep->fcode = open_file(fname[nfiles]); - break; - - case 'x': - rep->command = XCOM; - break; - - case 'y': - rep->command = YCOM; - seof = *cp++; - if (ycomp(rep) == 0) - quit(CGMES, (char *) linebuf); - break; - - } -done: - if(++rep >= pend) - quit("Too many commands, last: %S", (char *) linebuf); - - if(*cp++ != '\0') { - if(cp[-1] == ';') - goto comploop; - quit(CGMES, (char *) linebuf); - } - - } -} - -Biobuf * -open_file(char *name) -{ - Biobuf *bp; - int fd; - - if ((bp = malloc(sizeof(Biobuf))) == 0) - quit("Out of memory", 0); - if ((fd = open(name, OWRITE)) < 0 && - (fd = create(name, OWRITE, 0666)) < 0) - quit("Cannot create %s", name); - Binit(bp, fd, OWRITE); - Bseek(bp, 0, 2); - fcode[nfiles++] = bp; - return bp; -} - -Rune * -compsub(Rune *rhs, Rune *end) -{ - Rune r; - - while ((r = *cp++) != '\0') { - if(r == '\\') { - if (rhs < end) - *rhs++ = 0xFFFF; - else - return 0; - r = *cp++; - if(r == 'n') - r = '\n'; - } else { - if(r == seof) { - if (rhs < end) - *rhs++ = '\0'; - else - return 0; - return rhs; - } - } - if (rhs < end) - *rhs++ = r; - else - return 0; - - } - return 0; -} - -Reprog * -compile(void) -{ - Rune c; - char *ep; - char expbuf[512]; - - if((c = *cp++) == seof) /* '//' */ - return 0; - ep = expbuf; - do { - if (c == 0 || c == '\n') - quit(TMMES, (char *) linebuf); - if (c == '\\') { - if (ep >= expbuf+sizeof(expbuf)) - quit(TMMES, (char *) linebuf); - ep += runetochar(ep, &c); - if ((c = *cp++) == 'n') - c = '\n'; - } - if (ep >= expbuf+sizeof(expbuf)) - quit(TMMES, (char *) linebuf); - ep += runetochar(ep, &c); - } while ((c = *cp++) != seof); - *ep = 0; - return lastre = regcomp(expbuf); -} - -void -regerror(char *s) -{ - USED(s); - quit(CGMES, (char *) linebuf); -} - -void -newfile(enum PTYPE type, char *name) -{ - if (type == P_ARG) - prog.pctl.curr = name; - else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0) - quit("Cannot open pattern-file: %s\n", name); - prog.type = type; -} - -int -rline(Rune *buf, Rune *end) -{ - long c; - Rune r; - - while ((c = getrune()) >= 0) { - r = c; - if (r == '\\') { - if (buf <= end) - *buf++ = r; - if ((c = getrune()) < 0) - break; - r = c; - } else if (r == '\n') { - *buf = '\0'; - return(1); - } - if (buf <= end) - *buf++ = r; - } - *buf = '\0'; - return(-1); -} - -long -getrune(void) -{ - char *p; - long c; - Rune r; - - if (prog.type == P_ARG) { - if ((p = prog.pctl.curr) != 0) { - if (*p) { - prog.pctl.curr += chartorune(&r, p); - c = r; - } else { - c = '\n'; /* fake an end-of-line */ - prog.pctl.curr = 0; - } - } else - c = -1; - } else if ((c = Bgetrune(prog.pctl.bp)) < 0) - Bterm(prog.pctl.bp); - return c; -} - -void -address(Addr *ap) -{ - int c; - long lno; - - if((c = *cp++) == '$') - ap->type = A_DOL; - else if(c == '/') { - seof = c; - if (ap->u.rp = compile()) - ap->type = A_RE; - else - ap->type = A_LAST; - } - else if (c >= '0' && c <= '9') { - lno = c-'0'; - while ((c = *cp) >= '0' && c <= '9') - lno = lno*10 + *cp++-'0'; - if(!lno) - quit("line number 0 is illegal",0); - ap->type = A_LINE; - ap->u.line = lno; - } - else { - cp--; - ap->type = A_NONE; - } -} - -int -cmp(char *a, char *b) /* compare characters */ -{ - while(*a == *b++) - if (*a == '\0') - return(0); - else a++; - return(1); -} - -int -rcmp(Rune *a, Rune *b) /* compare runes */ -{ - while(*a == *b++) - if (*a == '\0') - return(0); - else a++; - return(1); -} - -char * -text(char *p) /* extract character string */ -{ - Rune r; - - while(*cp == '\t' || *cp == ' ') - cp++; - while (*cp) { - if ((r = *cp++) == '\\') - if ((r = *cp++) == 0) - break;; - if (r == '\n') - while (*cp == '\t' || *cp == ' ') - cp++; - p += runetochar(p, &r); - } - *p++ = '\0'; - return p; -} - -Rune * -stext(Rune *p, Rune *end) /* extract rune string */ -{ - while(*cp == '\t' || *cp == ' ') - cp++; - while (*cp) { - if (*cp == '\\') - if (*++cp == 0) - break; - if (p >= end-1) - quit(TMMES, (char *) linebuf); - if ((*p++ = *cp++) == '\n') - while(*cp == '\t' || *cp == ' ') - cp++; - } - *p++ = 0; - return p; -} - - -Label * -search (Label *ptr) -{ - Label *rp; - - for (rp = ltab; rp < ptr; rp++) - if(rcmp(rp->asc, ptr->asc) == 0) - return(rp); - return(0); -} - -void -dechain(void) -{ - Label *lptr; - SedCom *rptr, *trptr; - - for(lptr = ltab; lptr < lab; lptr++) { - - if(lptr->address == 0) - quit("Undefined label: %S", (char *) lptr->asc); - - if(lptr->chain) { - rptr = lptr->chain; - while(trptr = rptr->u.lb1) { - rptr->u.lb1 = lptr->address; - rptr = trptr; - } - rptr->u.lb1 = lptr->address; - } - } -} - -int -ycomp(SedCom *r) -{ - int i; - Rune *rp; - Rune c, *tsp, highc; - Rune *sp; - - highc = 0; - for(tsp = cp; *tsp != seof; tsp++) { - if(*tsp == '\\') - tsp++; - if(*tsp == '\n' || *tsp == '\0') - return(0); - if (*tsp > highc) highc = *tsp; - } - tsp++; - if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0) - quit("Out of memory", 0); - *rp++ = highc; /* save upper bound */ - for (i = 0; i <= highc; i++) - rp[i] = i; - sp = cp; - while((c = *sp++) != seof) { - if(c == '\\' && *sp == 'n') { - sp++; - c = '\n'; - } - if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { - rp[c] = '\n'; - tsp++; - } - if(rp[c] == seof || rp[c] == '\0') { - free(r->u.re1); - r->u.re1 = 0; - return(0); - } - } - if(*tsp != seof) { - free(r->u.re1); - r->u.re1 = 0; - return(0); - } - cp = tsp+1; - return(1); -} - -void -execute(void) -{ - SedCom *ipc; - - while (spend = gline(linebuf)){ - for(ipc = pspace; ipc->command; ) { - if (!executable(ipc)) { - ipc++; - continue; - } - command(ipc); - - if(delflag) - break; - if(jflag) { - jflag = 0; - if((ipc = ipc->u.lb1) == 0) - break; - } else - ipc++; - - } - if(!nflag && !delflag) - putline(&fout, linebuf, spend-linebuf); - if(aptr > abuf) { - arout(); - } - delflag = 0; - } -} - /* determine if a statement should be applied to an input line */ -int -executable(SedCom *ipc) -{ - if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ - if (ipc->active == 1) /* Second line */ - ipc->active = 2; - switch(ipc->ad2.type) { - case A_NONE: /* No second addr; use first */ - ipc->active = 0; - break; - case A_DOL: /* Accept everything */ - return !ipc->negfl; - case A_LINE: /* Line at end of range? */ - if (lnum <= ipc->ad2.u.line) { - if (ipc->ad2.u.line == lnum) - ipc->active = 0; - return !ipc->negfl; - } - ipc->active = 0; /* out of range */ - return ipc->negfl; - case A_RE: /* Check for matching R.E. */ - if (match(ipc->ad2.u.rp, linebuf)) - ipc->active = 0; - return !ipc->negfl; - default: /* internal error */ - quit("Internal error", 0); - } - } - switch (ipc->ad1.type) { /* Check first address */ - case A_NONE: /* Everything matches */ - return !ipc->negfl; - case A_DOL: /* Only last line */ - if (dolflag) - return !ipc->negfl; - break; - case A_LINE: /* Check line number */ - if (ipc->ad1.u.line == lnum) { - ipc->active = 1; /* In range */ - return !ipc->negfl; - } - break; - case A_RE: /* Check R.E. */ - if (match(ipc->ad1.u.rp, linebuf)) { - ipc->active = 1; /* In range */ - return !ipc->negfl; - } - break; - default: - quit("Internal error", 0); - } - return ipc->negfl; -} - -int -match(Reprog *pattern, Rune *buf) -{ - if (!pattern) - return 0; - subexp[0].s.rsp = buf; - subexp[0].e.rep = 0; - if (rregexec(pattern, linebuf, subexp, MAXSUB)) { - loc1 = subexp[0].s.rsp; - loc2 = subexp[0].e.rep; - return 1; - } - loc1 = loc2 = 0; - return 0; -} - -int -substitute(SedCom *ipc) -{ - int len; - - if(!match(ipc->u.re1, linebuf)) - return 0; - - /* - * we have at least one match. some patterns, e.g. '$' or '^', can - * produce zero-length matches, so during a global substitute we - * must bump to the character after a zero-length match to keep from looping. - */ - sflag = 1; - if(ipc->gfl == 0) /* single substitution */ - dosub(ipc->rhs); - else - do{ /* global substitution */ - len = loc2-loc1; /* length of match */ - dosub(ipc->rhs); /* dosub moves loc2 */ - if(*loc2 == 0) /* end of string */ - break; - if(len == 0) /* zero-length R.E. match */ - loc2++; /* bump over zero-length match */ - if(*loc2 == 0) /* end of string */ - break; - } while(match(ipc->u.re1, loc2)); - return 1; -} - -void -dosub(Rune *rhsbuf) -{ - Rune *lp, *sp; - Rune *rp; - int c, n; - - lp = linebuf; - sp = genbuf; - rp = rhsbuf; - while (lp < loc1) - *sp++ = *lp++; - while(c = *rp++) { - if (c == '&') { - sp = place(sp, loc1, loc2); - continue; - } - if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') { - n = c-'0'; - if (subexp[n].s.rsp && subexp[n].e.rep) { - sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep); - continue; - } - else { - fprint(2, "sed: Invalid back reference \\%d\n",n); - errexit(); - } - } - *sp++ = c; - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); - } - lp = loc2; - loc2 = sp - genbuf + linebuf; - while (*sp++ = *lp++) - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); - lp = linebuf; - sp = genbuf; - while (*lp++ = *sp++) - ; - spend = lp-1; -} - -Rune * -place(Rune *sp, Rune *l1, Rune *l2) -{ - while (l1 < l2) { - *sp++ = *l1++; - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); - } - return(sp); -} - -char * -trans(int c) -{ - static char buf[] = "\\x0000"; - static char hex[] = "0123456789abcdef"; - - switch(c) { - case '\b': - return "\\b"; - case '\n': - return "\\n"; - case '\r': - return "\\r"; - case '\t': - return "\\t"; - case '\\': - return "\\\\"; - } - buf[2] = hex[(c>>12)&0xF]; - buf[3] = hex[(c>>8)&0xF]; - buf[4] = hex[(c>>4)&0xF]; - buf[5] = hex[c&0xF]; - return buf; -} - -void -command(SedCom *ipc) -{ - int i, c; - Rune *p1, *p2; - char *ucp; - Rune *rp; - Rune *execp; - - switch(ipc->command) { - - case ACOM: - *aptr++ = ipc; - if(aptr >= abuf+MAXADDS) { - quit("sed: Too many appends after line %ld\n", - (char *) lnum); - } - *aptr = 0; - break; - case CCOM: - delflag = 1; - if(ipc->active == 1) { - for(rp = ipc->u.text; *rp; rp++) - Bputrune(&fout, *rp); - Bputc(&fout, '\n'); - } - break; - case DCOM: - delflag++; - break; - case CDCOM: - p1 = p2 = linebuf; - while(*p1 != '\n') { - if(*p1++ == 0) { - delflag++; - return; - } - } - p1++; - while(*p2++ = *p1++) - ; - spend = p2-1; - jflag++; - break; - case EQCOM: - Bprint(&fout, "%ld\n", lnum); - break; - case GCOM: - p1 = linebuf; - p2 = holdsp; - while(*p1++ = *p2++) - ; - spend = p1-1; - break; - case CGCOM: - *spend++ = '\n'; - p1 = spend; - p2 = holdsp; - while(*p1++ = *p2++) - if(p1 >= lbend) - break; - spend = p1-1; - break; - case HCOM: - p1 = holdsp; - p2 = linebuf; - while(*p1++ = *p2++); - hspend = p1-1; - break; - case CHCOM: - *hspend++ = '\n'; - p1 = hspend; - p2 = linebuf; - while(*p1++ = *p2++) - if(p1 >= hend) - break; - hspend = p1-1; - break; - case ICOM: - for(rp = ipc->u.text; *rp; rp++) - Bputrune(&fout, *rp); - Bputc(&fout, '\n'); - break; - case BCOM: - jflag = 1; - break; - case LCOM: - c = 0; - for (i = 0, rp = linebuf; *rp; rp++) { - c = *rp; - if(c >= 0x20 && c < 0x7F && c != '\\') { - Bputc(&fout, c); - if(i++ > 71) { - Bprint(&fout, "\\\n"); - i = 0; - } - } else { - for (ucp = trans(*rp); *ucp; ucp++){ - c = *ucp; - Bputc(&fout, c); - if(i++ > 71) { - Bprint(&fout, "\\\n"); - i = 0; - } - } - } - } - if(c == ' ') - Bprint(&fout, "\\n"); - Bputc(&fout, '\n'); - break; - case NCOM: - if(!nflag) - putline(&fout, linebuf, spend-linebuf); - - if(aptr > abuf) - arout(); - if((execp = gline(linebuf)) == 0) { - delflag = 1; - break; - } - spend = execp; - break; - case CNCOM: - if(aptr > abuf) - arout(); - *spend++ = '\n'; - if((execp = gline(spend)) == 0) { - delflag = 1; - break; - } - spend = execp; - break; - case PCOM: - putline(&fout, linebuf, spend-linebuf); - break; - case CPCOM: - cpcom: - for(rp = linebuf; *rp && *rp != '\n'; rp++) - Bputc(&fout, *rp); - Bputc(&fout, '\n'); - break; - case QCOM: - if(!nflag) - putline(&fout, linebuf, spend-linebuf); - if(aptr > abuf) - arout(); - exits(0); - case RCOM: - *aptr++ = ipc; - if(aptr >= &abuf[MAXADDS]) - quit("sed: Too many reads after line %ld\n", - (char *) lnum); - *aptr = 0; - break; - case SCOM: - i = substitute(ipc); - if(i && ipc->pfl) - if(ipc->pfl == 1) - putline(&fout, linebuf, spend-linebuf); - else - goto cpcom; - if(i && ipc->fcode) - goto wcom; - break; - - case TCOM: - if(sflag == 0) break; - sflag = 0; - jflag = 1; - break; - - wcom: - case WCOM: - putline(ipc->fcode,linebuf, spend-linebuf); - break; - case XCOM: - p1 = linebuf; - p2 = genbuf; - while(*p2++ = *p1++); - p1 = holdsp; - p2 = linebuf; - while(*p2++ = *p1++); - spend = p2 - 1; - p1 = genbuf; - p2 = holdsp; - while(*p2++ = *p1++); - hspend = p2 - 1; - break; - case YCOM: - p1 = linebuf; - p2 = ipc->u.text; - for (i = *p2++; *p1; p1++){ - if (*p1 <= i) *p1 = p2[*p1]; - } - break; - } - -} - -void -putline(Biobuf *bp, Rune *buf, int n) -{ - while (n--) - Bputrune(bp, *buf++); - Bputc(bp, '\n'); -} - -int -ecmp(Rune *a, Rune *b, int count) -{ - while(count--) - if(*a++ != *b++) return(0); - return(1); -} - -void -arout(void) -{ - Rune *p1; - Biobuf *fi; - int c; - char *s; - char buf[128]; - - for (aptr = abuf; *aptr; aptr++) { - if((*aptr)->command == ACOM) { - for(p1 = (*aptr)->u.text; *p1; p1++ ) - Bputrune(&fout, *p1); - Bputc(&fout, '\n'); - } else { - for(s = buf, p1= (*aptr)->u.text; *p1; p1++) - s += runetochar(s, p1); - *s = '\0'; - if((fi = Bopen(buf, OREAD)) == 0) - continue; - while((c = Bgetc(fi)) >= 0) - Bputc(&fout, c); - Bterm(fi); - } - } - aptr = abuf; - *aptr = 0; -} - -void -errexit(void) -{ - exits("error"); -} - -void -quit (char *msg, char *arg) -{ - fprint(2, "sed: "); - fprint(2, msg, arg); - fprint(2, "\n"); - errexit(); -} - -Rune * -gline(Rune *addr) -{ - long c; - Rune *p; - - static long peekc = 0; - - if (f == 0 && opendata() < 0) - return 0; - sflag = 0; - lnum++; -/* Bflush(&fout);********* dumped 4/30/92 - bobf****/ - do { - p = addr; - for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) { - if (c == '\n') { - if ((peekc = Bgetrune(f)) < 0) { - if (fhead == 0) - dolflag = 1; - } - *p = '\0'; - return p; - } - if (c && p < lbend) - *p++ = c; - } - /* return partial final line, adding implicit newline */ - if(p != addr) { - *p = '\0'; - peekc = -1; - if (fhead == 0) - dolflag = 1; - return p; - } - peekc = 0; - Bterm(f); - } while (opendata() > 0); /* Switch to next stream */ - f = 0; - return 0; -} - - /* Data file input section - the intent is to transparently - * catenate all data input streams. - */ -void -enroll(char *filename) /* Add a file to the input file cache */ -{ - FileCache *fp; - - if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) - quit("Out of memory", 0); - if (ftail == 0) - fhead = fp; - else - ftail->next = fp; - ftail = fp; - fp->next = 0; - fp->name = filename; /* 0 => stdin */ -} - -int -opendata(void) -{ - if (fhead == 0) - return -1; - if (fhead->name) { - if ((f = Bopen(fhead->name, OREAD)) == 0) - quit("Can't open %s", fhead->name); - } else { - Binit(&bstdin, 0, OREAD); - f = &bstdin; - } - fhead = fhead->next; - return 1; -} |
