This repository has been archived by the owner on Feb 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsedcomp.c
957 lines (849 loc) · 26.5 KB
/
sedcomp.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
/* sedcomp.c -- stream editor main and compilation phase
Copyright (C) 1995-2003 Eric S. Raymond
Copyright (C) 2004-2014 Rene Rebe
Copyright (C) 2015 Olli Vanhoja
The stream editor compiles its command input (from files or -e options)
into an internal form using compile() then executes the compiled form using
execute(). Main() just initializes data structures, interprets command line
options, and calls compile() and execute() in appropriate sequence.
The data structure produced by compile() is an array of compiled-command
structures (type sedcmd). These contain several pointers into pool[], the
regular-expression and text-data pool, plus a command code and g & p flags.
In the special case that the command is a label the struct will hold a ptr
into the labels array labels[] during most of the compile, until resolve()
resolves references at the end.
The operation of execute() is described in its source module.
*/
#include <stdlib.h> /* exit */
#include <stdio.h> /* uses getc, fprintf, fopen, fclose */
#include <ctype.h> /* isdigit */
#include <string.h> /* strcmp */
#include "sed.h" /* command type struct and name defines */
/***** public stuff ******/
#define MAXCMDS 200 /* maximum number of compiled commands */
#define MAXLINES 256 /* max # numeric addresses to compile */
/* main data areas */
char linebuf[MAXBUF+1]; /* current-line buffer */
sedcmd cmds[MAXCMDS+1]; /* hold compiled commands */
long linenum[MAXLINES]; /* numeric-addresses table */
/* miscellaneous shared variables */
int nflag; /* -n option flag */
int eargc; /* scratch copy of argument count */
sedcmd *pending = NULL; /* next command to be executed */
int last_line_used = 0; /* last line address ($) was used */
void die (const char* msg) {
fprintf(stderr, "sed: ");
fprintf(stderr, msg, linebuf);
fprintf(stderr, "\n");
exit(2);
}
/***** module common stuff *****/
#define POOLSIZE 10000 /* size of string-pool space */
#define WFILES 10 /* max # w output files that can be compiled */
#define RELIMIT 256 /* max chars in compiled RE */
#define MAXDEPTH 20 /* maximum {}-nesting level */
#define MAXLABS 50 /* max # of labels that can be handled */
#define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++
#define IFEQ(x, v) if (*x == v) x++ , /* do expression */
/* error messages */
static const char AGMSG[] = "garbled address %s";
static const char CGMSG[] = "garbled command %s";
static const char TMTXT[] = "too much text: %s";
static const char AD1NG[] = "no addresses allowed for %s";
static const char AD2NG[] = "only one address allowed for %s";
static const char TMCDS[] = "too many commands, last was %s";
static const char COCFI[] = "cannot open command-file %s";
static const char UFLAG[] = "unknown flag %c";
static const char CCOFI[] = "cannot create %s";
static const char ULABL[] = "undefined label %s";
static const char TMLBR[] = "too many {'s";
static const char FRENL[] = "first RE must be non-null";
static const char NSCAX[] = "no such command as %s";
static const char TMRBR[] = "too many }'s";
static const char DLABL[] = "duplicate label %s";
static const char TMLAB[] = "too many labels: %s";
static const char TMWFI[] = "too many w files";
static const char REITL[] = "RE too long: %s";
static const char TMLNR[] = "too many line numbers";
static const char TRAIL[] = "command \"%s\" has trailing garbage";
static const char RETER[] = "RE not terminated: %s";
static const char CCERR[] = "unknown character class: %s";
/* cclass to c function mapping ,-) */
static const char* cclasses[] = {
"alnum", "a-zA-Z0-9",
"lower", "a-z",
"space", " \f\n\r\t\v",
"alpha", "a-zA-Z",
"digit", "0-9",
"upper", "A-Z",
"blank", " \t",
"xdigit", "0-9A-Fa-f",
"cntrl", "\x01-\x1f\x7e",
"print", " -\x7e",
"graph", "!-\x7e",
"punct", "!-/:-@[-`{-\x7e",
NULL, NULL};
typedef struct /* represent a command label */
{
char *name; /* the label name */
sedcmd *last; /* it's on the label search list */
sedcmd *address; /* pointer to the cmd it labels */
} label;
/* label handling */
static label labels[MAXLABS]; /* here's the label table */
static label *lab = labels + 1; /* pointer to current label */
static label *lablst = labels; /* header for search list */
/* string pool for regular expressions, append text, etc. etc. */
static char pool[POOLSIZE]; /* the pool */
static char *fp = pool; /* current pool pointer */
static char *poolend = pool + POOLSIZE; /* pointer past pool end */
/* compilation state */
static FILE *cmdf = NULL; /* current command source */
static char *cp = linebuf; /* compile pointer */
static sedcmd *cmdp = cmds; /* current compiled-cmd ptr */
static char *lastre = NULL; /* old RE pointer */
static int bdepth = 0; /* current {}-nesting level */
static int bcount = 0; /* # tagged patterns in current RE */
static char **eargv; /* scratch copy of argument list */
/* compilation flags */
static int eflag; /* -e option flag */
static int gflag; /* -g option flag */
/* prototypes */
static char *address(char *expbuf);
static char *gettext(char* txp);
static char *recomp(char *expbuf, char redelim);
static char *rhscomp(char* rhsp, char delim);
static char *ycomp(char *ep, char delim);
static int cmdcomp(char cchar);
static int cmdline(char *cbuf);
static label *search(label *ptr);
static void compile(void);
static void resolve(void);
/* sedexec.c protypes */
void execute(char* file);
/* main sequence of the stream editor */
int main(int argc, char *argv[])
{
eargc = argc; /* set local copy of argument count */
eargv = argv; /* set local copy of argument list */
cmdp->addr1 = pool; /* 1st addr expand will be at pool start */
if (eargc == 1)
exit(0); /* exit immediately if no arguments */
/* scan through the arguments, interpreting each one */
while ((--eargc > 0) && (**++eargv == '-'))
switch (eargv[0][1])
{
case 'e':
eflag++; compile(); /* compile with e flag on */
eflag = 0;
continue; /* get another argument */
case 'f':
if (eargc-- <= 0) /* barf if no -f file */
exit(2);
if ((cmdf = fopen(*++eargv, "r")) == NULL)
{
fprintf(stderr, COCFI, *eargv);
exit(2);
}
compile(); /* file is O.K., compile it */
fclose(cmdf);
continue; /* go back for another argument */
case 'g':
gflag++; /* set global flag on all s cmds */
continue;
case 'n':
nflag++; /* no print except on p flag or w */
continue;
default:
fprintf(stdout, UFLAG, eargv[0][1]);
continue;
}
if (cmdp == cmds) /* no commands have been compiled */
{
eargv--; eargc++;
eflag++; compile(); eflag = 0;
eargv++; eargc--;
}
if (bdepth) /* we have unbalanced squigglies */
die(TMLBR);
lablst->address = cmdp; /* set up header of label linked list */
resolve(); /* resolve label table indirections */
if (eargc <= 0) /* if there were no -e commands */
execute(NULL); /* execute commands from stdin only */
else while(--eargc>=0) /* else execute only -e commands */
execute(*eargv++);
exit(0); /* everything was O.K. if we got here */
}
#define H 0x80 /* 128 bit, on if there's really code for command */
#define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */
/* indirect through this to get command internal code, if it exists */
static char cmdmask[] =
{
0, 0, H, 0, 0, H+EQCMD,0, 0,
0, 0, 0, 0, H+CDCMD,0, 0, CGCMD,
CHCMD, 0, 0, 0, H+CLCMD,0, CNCMD, 0,
CPCMD, 0, 0, 0, H+CTCMD,0, 0, H+CWCMD,
0, 0, 0, 0, 0, 0, 0, 0,
0, H+ACMD, H+BCMD, H+CCMD, DCMD, 0, 0, GCMD,
HCMD, H+ICMD, 0, 0, H+LCMD, 0, NCMD, 0,
PCMD, H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0, 0, H+WCMD,
XCMD, H+YCMD, 0, H+BCMD, 0, H, 0, 0,
};
/* precompile sed commands out of a file */
static void compile(void)
{
char ccode;
for(;;) /* main compilation loop */
{
SKIPWS(cp);
if (*cp == ';') {
cp++;
SKIPWS(cp);
}
if (*cp == '\0' || *cp == '#') /* get a new command line */
if (cmdline(cp = linebuf) < 0)
break;
SKIPWS(cp);
if (*cp == '\0' || *cp == '#') /* a comment */
continue;
/* compile first address */
if (fp > poolend)
die(TMTXT);
else if ((fp = address(cmdp->addr1 = fp)) == BAD)
die(AGMSG);
if (fp == cmdp->addr1) /* if empty RE was found */
{
if (lastre) /* if there was previous RE */
cmdp->addr1 = lastre; /* use it */
else
die(FRENL);
}
else if (fp == NULL) /* if fp was NULL */
{
fp = cmdp->addr1; /* use current pool location */
cmdp->addr1 = NULL;
}
else
{
lastre = cmdp->addr1;
if (*cp == ',' || *cp == ';') /* there's 2nd addr */
{
cp++;
if (fp > poolend) die(TMTXT);
fp = address(cmdp->addr2 = fp);
if (fp == BAD || fp == NULL) die(AGMSG);
if (fp == cmdp->addr2)
cmdp->addr2 = lastre;
else
lastre = cmdp->addr2;
}
else
cmdp->addr2 = NULL; /* no 2nd address */
}
if (fp > poolend) die(TMTXT);
SKIPWS(cp); /* discard whitespace after address */
if (*cp == '!') {
cmdp->flags.allbut = 1;
cp++; SKIPWS(cp);
}
/* get cmd char, range-check it */
if ((*cp < LOWCMD) || (*cp > '~')
|| ((ccode = cmdmask[*cp - LOWCMD]) == 0))
die(NSCAX);
cmdp->command = ccode & ~H; /* fill in command value */
if ((ccode & H) == 0) /* if no compile-time code */
cp++; /* discard command char */
else if (cmdcomp(*cp++)) /* execute it; if ret = 1 */
continue; /* skip next line read */
if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
SKIPWS(cp); /* look for trailing stuff */
if (*cp != '\0')
{
if (*cp == ';')
{
continue;
}
else if (*cp != '#' && *cp != '}')
die(TRAIL);
}
}
}
/* compile a single command */
static int cmdcomp(char cchar)
{
static sedcmd **cmpstk[MAXDEPTH]; /* current cmd stack for {} */
static const char *fname[WFILES]; /* w file name pointers */
static FILE *fout[WFILES]; /* w file file ptrs */
static int nwfiles = 2; /* count of open w files */
int i; /* indexing dummy used in w */
sedcmd *sp1, *sp2; /* temps for label searches */
label *lpt; /* ditto, and the searcher */
char redelim = 0; /* current RE delimiter */
fout[0] = stdout;
fout[1] = stderr;
fname[0] = "/dev/stdout";
fname[1] = "/dev/stderr";
switch(cchar)
{
case '{': /* start command group */
cmdp->flags.allbut = !cmdp->flags.allbut;
cmpstk[bdepth++] = &(cmdp->u.link);
if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
if (*cp == '\0') *cp++ = ';', *cp = '\0'; /* get next cmd w/o lineread */
return 1;
case '}': /* end command group */
if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
if (--bdepth < 0) die(TMRBR); /* too many right braces */
*cmpstk[bdepth] = cmdp; /* set the jump address */
return 1;
case '=': /* print current source line number */
case 'q': /* exit the stream editor */
if (cmdp->addr2) die(AD2NG);
break;
case ':': /* label declaration */
if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
fp = gettext(lab->name = fp); /* get the label name */
if ((lpt = search(lab))) /* does it have a double? */
{
if (lpt->address) die(DLABL); /* yes, abort */
}
else /* check that it doesn't overflow label table */
{
lab->last = NULL;
lpt = lab;
if (++lab >= labels + MAXLABS) die(TMLAB);
}
lpt->address = cmdp;
return 1;
case 'b': /* branch command */
case 't': /* branch-on-succeed command */
case 'T': /* branch-on-fail command */
SKIPWS(cp);
if (*cp == '\0') /* if branch is to start of cmds... */
{
/* add current command to end of label last */
if ((sp1 = lablst->last))
{
while((sp2 = sp1->u.link))
sp1 = sp2;
sp1->u.link = cmdp;
}
else /* lablst->last == NULL */
lablst->last = cmdp;
break;
}
fp = gettext(lab->name = fp); /* else get label into pool */
if ((lpt = search(lab))) /* enter branch to it */
{
if (lpt->address)
cmdp->u.link = lpt->address;
else
{
sp1 = lpt->last;
while((sp2 = sp1->u.link))
sp1 = sp2;
sp1->u.link = cmdp;
}
}
else /* matching named label not found */
{
lab->last = cmdp; /* add the new label */
lab->address = NULL; /* it's forward of here */
if (++lab >= labels + MAXLABS) /* overflow if last */
die(TMLAB);
}
break;
case 'a': /* append text */
case 'i': /* insert text */
case 'r': /* read file into stream */
if (cmdp->addr2) die(AD2NG);
case 'c': /* change text */
if ((*cp == '\\') && (*++cp == '\n')) cp++;
fp = gettext(cmdp->u.lhs = fp);
break;
case 'D': /* delete current line in hold space */
cmdp->u.link = cmds;
break;
case 's': /* substitute regular expression */
if (*cp == 0) /* get delimiter from 1st ch */
die(RETER);
else
redelim = *cp++;
if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD)
die(CGMSG);
if (fp == cmdp->u.lhs) { /* if compiled RE zero len */
if (lastre) {
cmdp->u.lhs = lastre; /* use the previous one */
cp++; /* skip delim */
}
else
die(FRENL);
}
else /* otherwise */
lastre = cmdp->u.lhs; /* save the one just found */
if ((cmdp->rhs = fp) > poolend) die(TMTXT);
if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) die(CGMSG);
if (gflag) cmdp->flags.global++;
while (*cp == 'g' || *cp == 'p' || *cp == 'P' || isdigit(*cp))
{
IFEQ(cp, 'g') cmdp->flags.global++;
IFEQ(cp, 'p') cmdp->flags.print = 1;
IFEQ(cp, 'P') cmdp->flags.print = 2;
if (isdigit(*cp))
{
if (cmdp->nth)
break; /* no multiple n args */
cmdp->nth = atoi(cp); /* check 0? */
while (isdigit(*cp)) cp++;
}
}
case 'l': /* list pattern space */
case 'L': /* dump pattern space */
if (*cp == 'w')
cp++; /* and execute a w command! */
else
break; /* s or L or l is done */
case 'w': /* write-pattern-space command */
case 'W': /* write-first-line command */
if (nwfiles >= WFILES) die(TMWFI);
fname[nwfiles] = fp;
fp = gettext((fname[nwfiles] = fp, fp)); /* filename will be in pool */
for(i = nwfiles-1; i >= 0; i--) /* match it in table */
if (strcmp(fname[nwfiles], fname[i]) == 0)
{
cmdp->fout = fout[i];
return 0;
}
/* if didn't find one, open new out file */
if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL)
{
fprintf(stderr, CCOFI, fname[nwfiles]);
exit(2);
}
fout[nwfiles++] = cmdp->fout;
break;
case 'y': /* transliterate text */
fp = ycomp(cmdp->u.lhs = fp, *cp++); /* compile translit */
if (fp == BAD) die(CGMSG); /* fail on bad form */
if (fp > poolend) die(TMTXT); /* fail on overflow */
break;
}
return 0; /* succeeded in interpreting one command */
}
/* generate replacement string for substitute command right hand side
rhsp: place to compile expression to
delim: regular-expression end-mark to look for */
static char *rhscomp(char* rhsp, char delim) /* uses bcount */
{
register char *p = cp;
for(;;)
/* copy for the likely case it is not s.th. special */
if ((*rhsp = *p++) == '\\') /* back reference or escape */
{
if (*p >= '0' && *p <= '9') /* back reference */
{
dobackref:
*rhsp = *p++;
/* check validity of pattern tag */
if (*rhsp > bcount + '0')
return BAD;
*rhsp++ |= 0x80; /* mark the good ones */
}
else /* escape */
{
switch (*p) {
case 'n': *rhsp = '\n'; break;
case 'r': *rhsp = '\r'; break;
case 't': *rhsp = '\t'; break;
default: *rhsp = *p;
}
rhsp++; p++;
}
}
else if (*rhsp == delim) /* found RE end, hooray... */
{
*rhsp++ = '\0'; /* cap the expression string */
cp = p;
return rhsp; /* pt at 1 past the RE */
}
else if (*rhsp == '&') /* special case, convert to backref \0 */
{
*--p = '0';
goto dobackref;
}
else if (*rhsp++ == '\0') /* last ch not RE end, help! */
return BAD;
}
/* compile a regular expression to internal form
expbuf: place to compile it to
redelim: RE end-marker to look for */
static char *recomp(char *expbuf, char redelim) /* uses cp, bcount */
{
register char *ep = expbuf; /* current-compiled-char pointer */
register char *sp = cp; /* source-character ptr */
register int c; /* current-character pointer */
char negclass; /* all-but flag */
char *lastep; /* ptr to last expr compiled */
char *lastep2; /* dito, but from the last loop */
char *svclass; /* start of current char class */
char brnest[MAXTAGS]; /* bracket-nesting array */
char *brnestp; /* ptr to current bracket-nest */
char *pp; /* scratch pointer */
int classct; /* class element count */
int tags; /* # of closed tags */
if (*cp == redelim) { /* if first char is RE endmarker */
return ep;
}
lastep = lastep2 = NULL; /* there's no previous RE */
brnestp = brnest; /* initialize ptr to brnest array */
tags = bcount = 0; /* initialize counters */
if ((*ep++ = (*sp == '^'))) /* check for start-of-line syntax */
sp++;
for (;;)
{
if (*sp == 0) /* no termination */
die (RETER);
if (ep >= expbuf + RELIMIT) /* match is too large */
return cp = sp, BAD;
if ((c = *sp++) == redelim) /* found the end of the RE */
{
cp = sp;
if (brnestp != brnest) /* \(, \) unbalanced */
return BAD;
*ep++ = CEOF; /* write end-of-pattern mark */
return ep; /* return ptr to compiled RE */
}
lastep = lastep2;
lastep2 = ep;
switch (c)
{
case '\\':
if ((c = *sp++) == '(') /* start tagged section */
{
if (bcount >= MAXTAGS)
return cp = sp, BAD;
*brnestp++ = bcount; /* update tag stack */
*ep++ = CBRA; /* enter tag-start */
*ep++ = bcount++; /* bump tag count */
lastep2 = NULL;
continue;
}
else if (c == ')') /* end tagged section */
{
if (brnestp <= brnest) /* extra \) */
return cp = sp, BAD;
*ep++ = CKET; /* enter end-of-tag */
*ep++ = *--brnestp; /* pop tag stack */
tags++; /* count closed tags */
for (lastep2 = ep-1; *lastep2 != CBRA; )
--lastep2; /* FIXME: lastep becomes start */
continue;
}
else if (c >= '1' && c <= '9' && c != redelim) /* tag use, if !delim */
{
if ((c -= '1') >= tags) /* too few */
return BAD;
*ep++ = CBACK; /* enter tag mark */
*ep++ = c; /* and the number */
continue;
}
else if (c == '\n') /* escaped newline no good */
return cp = sp, BAD;
else if (c == 'n') /* match a newline */
c = '\n';
else if (c == 't') /* match a tab */
c = '\t';
else if (c == 'r') /* match a return */
c = '\r';
else if (c == '+') /* 1..n repeat of previous pattern */
{
if (lastep == NULL) /* if + not first on line */
goto defchar; /* match a literal + */
pp = ep; /* else save old ep */
*ep++ = *lastep++ | STAR; /* flag the copy */
while (lastep < pp) /* so we can blt the pattern */
*ep++ = *lastep++;
lastep2 = lastep; /* no new expression */
continue;
}
goto defchar; /* else match \c */
case '\0': /* ignore nuls */
continue;
case '\n': /* trailing pattern delimiter is missing */
return cp = sp, BAD;
case '.': /* match any char except newline */
*ep++ = CDOT;
continue;
case '*': /* 0..n repeat of previous pattern */
if (lastep == NULL) /* if * isn't first on line */
goto defchar; /* match a literal * */
*lastep |= STAR; /* flag previous pattern */
lastep2 = lastep; /* no new expression */
continue;
case '$': /* match only end-of-line */
if (*sp != redelim) /* if we're not at end of RE */
goto defchar; /* match a literal $ */
*ep++ = CDOL; /* insert end-symbol mark */
continue;
case '[': /* begin character set pattern */
if (ep + 17 >= expbuf + RELIMIT)
die(REITL);
*ep++ = CCL; /* insert class mark */
if ((negclass = ((c = *sp++) == '^')))
c = *sp++;
svclass = sp; /* save ptr to class start */
do {
if (c == '\0') die(CGMSG);
/* handle predefined character classes */
if (c == '[' && *sp == ':')
{
/* look for the matching ":]]" */
char *p;
const char *p2;
for (p = sp+3; *p; p++)
if (*p == ']' &&
*(p-1) == ']' &&
*(p-2) == ':')
{
char cc[8];
const char **it;
p2 = sp+1;
for (p2 = sp+1;
p2 < p-2 && p2-sp-1 < sizeof(cc);
p2++)
cc[p2-sp-1] = *p2;
cc[p2-sp-1] = 0; /* termination */
it = cclasses;
while (*it && strcmp(*it, cc))
it +=2;
if (!*it++)
die(CCERR);
/* generate mask */
p2 = *it;
while (*p2) {
if (p2[1] == '-' && p2[2]) {
for (c = *p2; c <= p2[2]; c++)
ep[c >> 3] |= bits(c & 7);
p2 += 3;
}
else {
c = *p2++;
ep[c >> 3] |= bits(c & 7);
}
}
sp = p; c = 0; break;
}
}
/* handle character ranges */
if (c == '-' && sp > svclass && *sp != ']')
for (c = sp[-2]; c < *sp; c++)
ep[c >> 3] |= bits(c & 7);
/* handle escape sequences in sets */
if (c == '\\')
{
if ((c = *sp++) == 'n')
c = '\n';
else if (c == 't')
c = '\t';
else if (c == 'r')
c = '\r';
}
/* enter (possibly translated) char in set */
if (c)
ep[c >> 3] |= bits(c & 7);
} while
((c = *sp++) != ']');
/* invert the bitmask if all-but was specified */
if (negclass)
for(classct = 0; classct < 16; classct++)
ep[classct] ^= 0xFF;
ep[0] &= 0xFE; /* never match ASCII 0 */
ep += 16; /* advance ep past set mask */
continue;
defchar: /* match literal character */
default: /* which is what we'd do by default */
*ep++ = CCHR; /* insert character mark */
*ep++ = c;
}
}
}
/* read next command from -e argument or command file */
static int cmdline(char *cbuf) /* uses eflag, eargc, cmdf */
{
register int inc; /* not char because must hold EOF */
cbuf--; /* so pre-increment points us at cbuf */
/* e command flag is on */
if (eflag)
{
register char *p; /* ptr to current -e argument */
static char *savep; /* saves previous value of p */
if (eflag > 0) /* there are pending -e arguments */
{
eflag = -1;
if (eargc-- <= 0)
exit(2); /* if no arguments, barf */
/* else transcribe next e argument into cbuf */
p = *++eargv;
while((*++cbuf = *p++))
if (*cbuf == '\\')
{
if ((*++cbuf = *p++) == '\0')
return savep = NULL, -1;
else
continue;
}
else if (*cbuf == '\n') /* end of 1 cmd line */
{
*cbuf = '\0';
return savep = p, 1;
/* we'll be back for the rest... */
}
/* found end-of-string; can advance to next argument */
return savep = NULL, 1;
}
if ((p = savep) == NULL)
return -1;
while((*++cbuf = *p++))
if (*cbuf == '\\')
{
if ((*++cbuf = *p++) == '0')
return savep = NULL, -1;
else
continue;
}
else if (*cbuf == '\n')
{
*cbuf = '\0';
return savep = p, 1;
}
return savep = NULL, 1;
}
/* if no -e flag read from command file descriptor */
while((inc = getc(cmdf)) != EOF) /* get next char */
if ((*++cbuf = inc) == '\\') /* if it's escape */
*++cbuf = inc = getc(cmdf); /* get next char */
else if (*cbuf == '\n') /* end on newline */
return *cbuf = '\0', 1; /* cap the string */
return *++cbuf = '\0', -1; /* end-of-file, no more chars */
}
/* expand an address at *cp... into expbuf, return ptr at following char */
static char *address(char *expbuf) /* uses cp, linenum */
{
register char *rcp; /* temp compile ptr for forwd look */
long lno; /* computed value of numeric address */
if (*cp == '$') /* end-of-source address */
{
*expbuf++ = CEND; /* write symbolic end address */
*expbuf++ = CEOF; /* and the end-of-address mark (!) */
cp++; /* go to next source character */
last_line_used = TRUE;
return expbuf; /* we're done */
}
if (*cp == '/') /* start of regular-expression match */
return recomp(expbuf, *cp++); /* compile the RE */
rcp = cp; lno = 0; /* now handle a numeric address */
while(*rcp >= '0' && *rcp <= '9') /* collect digits */
lno = lno*10 + *rcp++ - '0'; /* compute their value */
if (rcp > cp) /* if we caught a number... */
{
static int numl = 0; /* current ind in addr-number table */
*expbuf++ = CLNUM; /* put a numeric-address marker */
*expbuf++ = numl; /* and the address table index */
linenum[numl++] = lno; /* and set the table entry */
if (numl >= MAXLINES) /* oh-oh, address table overflow */
die(TMLNR); /* abort with error message */
*expbuf++ = CEOF; /* write the end-of-address marker */
cp = rcp; /* point compile past the address */
return expbuf; /* we're done */
}
return NULL; /* no legal address was found */
}
/* accept multiline input from *cp..., discarding leading whitespace
txp: where to put the text */
static char *gettext(char* txp) /* uses global cp */
{
register char *p = cp;
SKIPWS(p); /* discard whitespace */
do {
if ((*txp = *p++) == '\\') /* handle escapes */
*txp = *p++;
if (*txp == '\0') /* we're at end of input */
return cp = --p, ++txp;
else if (*txp == '\n') /* also SKIPWS after newline */
SKIPWS(p);
} while (txp++); /* keep going till we find that nul */
return txp;
}
/* find the label matching *ptr, return NULL if none */
static label *search(label *ptr) /* uses global lablst */
{
register label *rp;
for(rp = lablst; rp < ptr; rp++)
if ((rp->name != NULL) && (strcmp(rp->name, ptr->name) == 0))
return rp;
return NULL;
}
/* write label links into the compiled-command space */
static void resolve(void) /* uses global lablst */
{
register label *lptr;
register sedcmd *rptr, *trptr;
/* loop through the label table */
for(lptr = lablst; lptr < lab; lptr++)
if (lptr->address == NULL) /* barf if not defined */
{
fprintf(stderr, ULABL, lptr->name);
exit(2);
}
else if (lptr->last) /* if last is non-null */
{
rptr = lptr->last; /* chase it */
while((trptr = rptr->u.link)) /* resolve refs */
{
rptr->u.link = lptr->address;
rptr = trptr;
}
rptr->u.link = lptr->address;
}
}
/* compile a y (transliterate) command
ep: where to compile to
delim: end delimiter to look for */
static char *ycomp(char *ep, char delim)
{
char *tp, *sp;
int c;
/* scan the 'from' section for invalid chars */
for(sp = tp = cp; *tp != delim; tp++)
{
if (*tp == '\\')
tp++;
if ((*tp == '\n') || (*tp == '\0'))
return BAD;
}
tp++; /* tp now points at first char of 'to' section */
/* now rescan the 'from' section */
while((c = *sp++ & 0x7F) != delim)
{
if (c == '\\' && *sp == 'n')
{
sp++;
c = '\n';
}
if ((ep[c] = *tp++) == '\\' && *tp == 'n')
{
ep[c] = '\n';
tp++;
}
if ((ep[c] == delim) || (ep[c] == '\0'))
return BAD;
}
if (*tp != delim) /* 'to', 'from' parts have unequal lengths */
return BAD;
cp = ++tp; /* point compile ptr past translit */
for(c = 0; c < 128; c++) /* fill in self-map entries in table */
if (ep[c] == 0)
ep[c] = c;
return ep + 0x80; /* return first free location past table end */
}
/* sedcomp.c ends here */