1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168#define FOR_sed
169#include "toys.h"
170
171GLOBALS(
172 struct arg_list *f;
173 struct arg_list *e;
174
175
176 struct double_list *pattern;
177
178 char *nextline, *remember;
179 void *restart, *lastregex;
180 long nextlen, rememberlen, count;
181 int fdout, noeol;
182 unsigned xx;
183)
184
185
186
187
188
189
190struct sedcmd {
191 struct sedcmd *next, *prev;
192
193
194 long lmatch[2];
195 int rmatch[2];
196 int arg1, arg2, w;
197 unsigned not, hit;
198 unsigned sflags;
199 char c;
200};
201
202
203static int emit(char *line, long len, int eol)
204{
205 int l, old = line[len];
206
207 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
208 TT.noeol = !eol;
209 if (eol) line[len++] = '\n';
210 if (!len) return 0;
211 l = writeall(TT.fdout, line, len);
212 if (eol) line[len-1] = old;
213 if (l != len) {
214 perror_msg("short write");
215
216 return 1;
217 }
218
219 return 0;
220}
221
222
223
224static char *extend_string(char **old, char *new, int oldlen, int newlen)
225{
226 int newline = newlen < 0;
227 char *s;
228
229 if (newline) newlen = -newlen;
230 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
231 if (newline) s[oldlen++] = '\n';
232 memcpy(s+oldlen, new, newlen);
233 s[oldlen+newlen] = 0;
234
235 return s+oldlen+newlen+1;
236}
237
238
239static void *get_regex(void *trump, int offset)
240{
241 if (!offset) {
242 if (!TT.lastregex) error_exit("no previous regex");
243 return TT.lastregex;
244 }
245
246 return TT.lastregex = offset+(char *)trump;
247}
248
249
250static void process_line(char **pline, long plen)
251{
252 struct append {
253 struct append *next, *prev;
254 int file;
255 char *str;
256 } *append = 0;
257 char *line = TT.nextline;
258 long len = TT.nextlen;
259 struct sedcmd *command;
260 int eol = 0, tea = 0;
261
262
263
264
265 TT.nextline = 0;
266 TT.nextlen = 0;
267 if (pline) {
268 TT.nextline = *pline;
269 TT.nextlen = plen;
270 *pline = 0;
271 }
272
273 if (!line || !len) return;
274 if (line[len-1] == '\n') line[--len] = eol++;
275 TT.count++;
276
277
278
279 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
280 TT.restart = 0;
281
282 while (command) {
283 char *str, c = command->c;
284
285
286 if (*command->lmatch || *command->rmatch) {
287 int miss = 0;
288 long lm;
289
290
291 if (command->hit) {
292 if (!(lm = command->lmatch[1])) {
293 if (!command->rmatch[1]) command->hit = 0;
294 else {
295 void *rm = get_regex(command, command->rmatch[1]);
296
297
298 if (line && !regexec0(rm, line, len, 0, 0, 0)) miss = 1;
299 }
300 } else if (lm > 0 && lm < TT.count) command->hit = 0;
301
302
303 } else {
304 if (!(lm = *command->lmatch)) {
305 void *rm = get_regex(command, *command->rmatch);
306
307 if (line && !regexec0(rm, line, len, 0, 0, 0)) command->hit++;
308 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
309
310 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
311 }
312
313
314 lm = !(command->hit ^ command->not);
315
316
317 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
318
319 if (lm) {
320
321 if (c == '{') {
322 int curly = 1;
323
324 while (curly) {
325 command = command->next;
326 if (command->c == '{') curly++;
327 if (command->c == '}') curly--;
328 }
329 }
330 command = command->next;
331 continue;
332 }
333 }
334
335
336 if (!line) {
337 command = command->next;
338 continue;
339 }
340
341
342
343 if (c=='a' || c=='r') {
344 struct append *a = xzalloc(sizeof(struct append));
345 if (command->arg1) a->str = command->arg1+(char *)command;
346 a->file = c=='r';
347 dlist_add_nomalloc((void *)&append, (void *)a);
348 } else if (c=='b' || c=='t' || c=='T') {
349 int t = tea;
350
351 if (c != 'b') tea = 0;
352 if (c=='b' || t^(c=='T')) {
353 if (!command->arg1) break;
354 str = command->arg1+(char *)command;
355 for (command = (void *)TT.pattern; command; command = command->next)
356 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
357 break;
358 if (!command) error_exit("no :%s", str);
359 }
360 } else if (c=='c') {
361 str = command->arg1+(char *)command;
362 if (!command->hit) emit(str, strlen(str), 1);
363 free(line);
364 line = 0;
365 continue;
366 } else if (c=='d') {
367 free(line);
368 line = 0;
369 continue;
370 } else if (c=='D') {
371
372 str = line;
373 while ((str-line)<len) if (*(str++) == '\n') break;
374 len -= str - line;
375 memmove(line, str, len);
376
377
378
379 if (!len) {
380 free(line);
381 line = 0;
382 } else {
383 line[len] = 0;
384 command = (void *)TT.pattern;
385 }
386 continue;
387 } else if (c=='g') {
388 free(line);
389 line = xstrdup(TT.remember);
390 len = TT.rememberlen;
391 } else if (c=='G') {
392 line = xrealloc(line, len+TT.rememberlen+2);
393 line[len++] = '\n';
394 memcpy(line+len, TT.remember, TT.rememberlen);
395 line[len += TT.rememberlen] = 0;
396 } else if (c=='h') {
397 free(TT.remember);
398 TT.remember = xstrdup(line);
399 TT.rememberlen = len;
400 } else if (c=='H') {
401 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
402 TT.remember[TT.rememberlen++] = '\n';
403 memcpy(TT.remember+TT.rememberlen, line, len);
404 TT.remember[TT.rememberlen += len] = 0;
405 } else if (c=='i') {
406 str = command->arg1+(char *)command;
407 emit(str, strlen(str), 1);
408 } else if (c=='l') {
409 int i, x, off;
410
411 if (!TT.xx) {
412 terminal_size(&TT.xx, 0);
413 if (!TT.xx) TT.xx = 80;
414 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
415 if (TT.xx > 4) TT.xx -= 4;
416 }
417
418 for (i = off = 0; i<len; i++) {
419 if (off >= TT.xx) {
420 toybuf[off++] = '\\';
421 emit(toybuf, off, 1);
422 off = 0;
423 }
424 x = stridx("\\\a\b\f\r\t\v", line[i]);
425 if (x != -1) {
426 toybuf[off++] = '\\';
427 toybuf[off++] = "\\abfrtv"[x];
428 } else if (line[i] >= ' ') toybuf[off++] = line[i];
429 else off += sprintf(toybuf+off, "\\%03o", line[i]);
430 }
431 toybuf[off++] = '$';
432 emit(toybuf, off, 1);
433 } else if (c=='n') {
434 TT.restart = command->next+1;
435
436 break;
437 } else if (c=='N') {
438
439
440 if (pline) {
441 TT.restart = command->next+1;
442 extend_string(&line, TT.nextline, len, -TT.nextlen);
443 free(TT.nextline);
444 TT.nextline = line;
445 TT.nextlen += len + 1;
446 line = 0;
447 }
448
449
450 goto done;
451 } else if (c=='p' || c=='P') {
452 char *l = (c=='P') ? strchr(line, '\n') : 0;
453
454 if (emit(line, l ? l-line : len, eol)) break;
455 } else if (c=='q') {
456 if (pline) *pline = (void *)1;
457 free(TT.nextline);
458 TT.nextline = 0;
459 TT.nextlen = 0;
460
461 break;
462 } else if (c=='s') {
463 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
464 regmatch_t *match = (void *)toybuf;
465 regex_t *reg = get_regex(command, command->arg1);
466 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
467
468
469 while (!regexec0(reg, rline, rlen, 10, match, mflags)) {
470 mflags = REG_NOTBOL;
471
472
473 mlen = match[0].rm_eo-match[0].rm_so;
474 if (!mlen && !zmatch) {
475 if (!rlen--) break;
476 rline++;
477 zmatch++;
478 continue;
479 } else zmatch = 0;
480
481
482 off = command->sflags>>3;
483 if (off && off != ++count) {
484 rline += match[0].rm_eo;
485 rlen -= match[0].rm_eo;
486
487 continue;
488 }
489
490
491 if (match[0].rm_eo > INT_MAX) perror_exit(0);
492
493
494 for (off = newlen = 0; new[off]; off++) {
495 int cc = -1;
496
497 if (new[off] == '&') cc = 0;
498 else if (new[off] == '\\') cc = new[++off] - '0';
499 if (cc < 0 || cc > 9) {
500 newlen++;
501 continue;
502 }
503 newlen += match[cc].rm_eo-match[cc].rm_so;
504 }
505
506
507
508 len += newlen-mlen;
509 swap = xmalloc(len+1);
510 rswap = swap+(rline-line)+match[0].rm_so;
511 memcpy(swap, line, (rline-line)+match[0].rm_so);
512 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
513
514
515 for (off = mlen = 0; new[off]; off++) {
516 int cc = 0, ll;
517
518 if (new[off] == '\\') {
519 cc = new[++off] - '0';
520 if (cc<0 || cc>9) {
521 if (!(rswap[mlen++] = unescape(new[off])))
522 rswap[mlen-1] = new[off];
523
524 continue;
525 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
526 } else if (new[off] != '&') {
527 rswap[mlen++] = new[off];
528
529 continue;
530 }
531
532 ll = match[cc].rm_eo-match[cc].rm_so;
533 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
534 mlen += ll;
535 }
536
537 rline = rswap+newlen;
538 free(line);
539 line = swap;
540
541
542 if (!(command->sflags & 2)) break;
543 }
544
545 if (mflags) {
546
547 if (command->sflags & 4) emit(line, len, eol);
548
549 tea = 1;
550 if (command->w) goto writenow;
551 }
552 } else if (c=='w') {
553 int fd, noeol;
554 char *name;
555
556writenow:
557
558 fd = TT.fdout;
559 noeol = TT.noeol;
560
561
562 name = command->w + (char *)command;
563 memcpy(&TT.fdout, name, 4);
564 name += 4;
565 TT.noeol = *(name++);
566
567
568 if (emit(line, len, eol))
569 perror_exit("w '%s'", command->arg1+(char *)command);
570 *(--name) = TT.noeol;
571 TT.noeol = noeol;
572 TT.fdout = fd;
573 } else if (c=='x') {
574 long swap = TT.rememberlen;
575
576 str = TT.remember;
577 TT.remember = line;
578 line = str;
579 TT.rememberlen = len;
580 len = swap;
581 } else if (c=='y') {
582 char *from, *to = (char *)command;
583 int i, j;
584
585 from = to+command->arg1;
586 to += command->arg2;
587
588 for (i = 0; i < len; i++) {
589 j = stridx(from, line[i]);
590 if (j != -1) line[i] = to[j];
591 }
592 } else if (c=='=') {
593 sprintf(toybuf, "%ld", TT.count);
594 emit(toybuf, strlen(toybuf), 1);
595 }
596
597 command = command->next;
598 }
599
600 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
601
602done:
603 if (dlist_terminate(append)) while (append) {
604 struct append *a = append->next;
605
606 if (append->file) {
607 int fd = open(append->str, O_RDONLY);
608
609
610 if (fd != -1) {
611 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
612 TT.noeol = 0;
613 xsendfile(fd, TT.fdout);
614 close(fd);
615 }
616 } else if (append->str) emit(append->str, strlen(append->str), 1);
617 else emit(line, 0, 0);
618 free(append);
619 append = a;
620 }
621 free(line);
622}
623
624
625static void do_sed(int fd, char *name)
626{
627 int i = toys.optflags & FLAG_i;
628 char *tmp;
629
630 if (i) {
631 struct sedcmd *command;
632
633 if (!fd) {
634 error_msg("-i on stdin");
635 return;
636 }
637 TT.fdout = copy_tempfile(fd, name, &tmp);
638 TT.count = 0;
639 for (command = (void *)TT.pattern; command; command = command->next)
640 command->hit = 0;
641 }
642 do_lines(fd, process_line);
643 if (i) {
644 process_line(0, 0);
645 replace_tempfile(-1, TT.fdout, &tmp);
646 TT.fdout = 1;
647 TT.nextline = 0;
648 TT.nextlen = TT.noeol = 0;
649 }
650}
651
652
653
654
655
656static char *unescape_delimited_string(char **pstr, char *delim)
657{
658 char *to, *from, mode = 0, d;
659
660
661 from = *pstr;
662 if (!delim || !*delim) {
663 if (!(d = *(from++))) return 0;
664 if (d == '\\') d = *(from++);
665 if (!d || d == '\\') return 0;
666 if (delim) *delim = d;
667 } else d = *delim;
668 to = delim = xmalloc(strlen(*pstr)+1);
669
670 while (mode || *from != d) {
671 if (!*from) return 0;
672
673
674 if (*from == '[') {
675 if (!mode) {
676 mode = ']';
677 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
678 } else if (mode == ']' && strchr(".=:", from[1])) {
679 *(to++) = *(from++);
680 mode = *from;
681 }
682 } else if (*from == mode) {
683 if (mode == ']') mode = 0;
684 else {
685 *(to++) = *(from++);
686 mode = ']';
687 }
688
689
690 } else if (mode && *from == '-' && from[-1] == from[1]) {
691 from+=2;
692 continue;
693 } else if (*from == '\\') {
694 if (!from[1]) return 0;
695
696
697 if (from[1] == d) from++;
698 else if (from[1]=='\\') *(to++) = *(from++);
699 else {
700 char c = unescape(from[1]);
701
702 if (c) {
703 *(to++) = c;
704 from+=2;
705 continue;
706 } else if (!mode) *(to++) = *(from++);
707 }
708 }
709 *(to++) = *(from++);
710 }
711 *to = 0;
712 *pstr = from+1;
713
714 return delim;
715}
716
717
718
719static void parse_pattern(char **pline, long len)
720{
721 struct sedcmd *command = (void *)TT.pattern;
722 char *line, *reg, c, *errstart;
723 int i;
724
725 line = errstart = pline ? *pline : "";
726 if (len && line[len-1]=='\n') line[--len] = 0;
727
728
729
730
731
732 if (command && command->prev->hit) {
733
734 TT.pattern = TT.pattern->prev;
735 command = dlist_pop(&TT.pattern);
736 c = command->c;
737 reg = (char *)command;
738 reg += command->arg1 + strlen(reg + command->arg1);
739
740
741
742
743 if (command->hit < 256) goto resume_s;
744 else goto resume_a;
745 }
746
747
748
749 command = 0;
750 for (;;) {
751 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
752
753
754 for (;;) {
755 while (isspace(*line) || *line == ';') line++;
756 if (*line == '#') while (*line && *line != '\n') line++;
757 else break;
758 }
759 if (!*line) return;
760
761
762
763
764 errstart = line;
765 memset(toybuf, 0, sizeof(struct sedcmd));
766 command = (void *)toybuf;
767 reg = toybuf + sizeof(struct sedcmd);
768
769
770 for (i = 0; i < 2; i++) {
771 if (*line == ',') line++;
772 else if (i) break;
773
774 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
775 else if (*line == '$') {
776 command->lmatch[i] = -1;
777 line++;
778 } else if (*line == '/' || *line == '\\') {
779 char *s = line;
780
781 if (!(s = unescape_delimited_string(&line, 0))) goto error;
782 if (!*s) command->rmatch[i] = 0;
783 else {
784 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
785 command->rmatch[i] = reg-toybuf;
786 reg += sizeof(regex_t);
787 }
788 free(s);
789 } else break;
790 }
791
792 while (isspace(*line)) line++;
793 if (!*line) break;
794
795 while (*line == '!') {
796 command->not = 1;
797 line++;
798 }
799 while (isspace(*line)) line++;
800
801 c = command->c = *(line++);
802 if (strchr("}:", c) && i) break;
803 if (strchr("aiqr=", c) && i>1) break;
804
805
806 command = xmemdup(toybuf, reg-toybuf);
807 reg = (reg-toybuf) + (char *)command;
808
809
810 if (c == '{') TT.nextlen++;
811 else if (c == '}') {
812 if (!TT.nextlen--) break;
813 } else if (c == 's') {
814 char *end, delim = 0;
815
816
817
818
819
820
821
822
823 command->arg2 = reg - (char *)command;
824 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
825 goto error;
826
827 reg += sizeof(regex_t);
828 command->arg1 = reg-(char *)command;
829 command->hit = delim;
830resume_s:
831
832
833 end = line;
834 while (*end != command->hit) {
835 if (!*end) goto error;
836 if (*end++ == '\\') {
837 if (!*end || *end == '\n') {
838 end[-1] = '\n';
839 break;
840 }
841 end++;
842 }
843 }
844
845 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
846 line = end;
847
848 if (*line == command->hit) command->hit = 0;
849 else {
850 if (!*line) continue;
851 reg--;
852 line++;
853 goto resume_s;
854 }
855
856
857 i = command->arg1;
858 command->arg1 = command->arg2;
859 command->arg2 = i;
860
861
862 for (line++; *line; line++) {
863 long l;
864
865 if (isspace(*line) && *line != '\n') continue;
866
867 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
868 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
869 command->sflags |= l << 3;
870 line--;
871 } else break;
872 }
873
874
875
876 if (!*TT.remember) command->arg1 = 0;
877 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
878 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((command->sflags&1)*REG_ICASE));
879 free(TT.remember);
880 TT.remember = 0;
881 if (*line == 'w') {
882 line++;
883 goto writenow;
884 }
885 } else if (c == 'w') {
886 int fd, delim;
887 char *cc;
888
889
890
891
892
893
894writenow:
895 while (isspace(*line)) line++;
896 if (!*line) goto error;
897 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
898 delim = *cc;
899 *cc = 0;
900 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
901 *cc = delim;
902
903 command->w = reg - (char *)command;
904 command = xrealloc(command, command->w+(cc-line)+6);
905 reg = command->w + (char *)command;
906
907 memcpy(reg, &fd, 4);
908 reg += 4;
909 *(reg++) = 0;
910 memcpy(reg, line, delim);
911 reg += delim;
912 *(reg++) = 0;
913
914 line = cc;
915 if (delim) line += 2;
916 } else if (c == 'y') {
917 char *s, delim = 0;
918 int len;
919
920 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
921 command->arg1 = reg-(char *)command;
922 len = strlen(s);
923 reg = extend_string((void *)&command, s, reg-(char *)command, len);
924 free(s);
925 command->arg2 = reg-(char *)command;
926 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
927 if (len != strlen(s)) goto error;
928 reg = extend_string((void *)&command, s, reg-(char*)command, len);
929 free(s);
930 } else if (strchr("abcirtTw:", c)) {
931 int end;
932
933
934 while (isspace(*line) && *line != '\n') line++;
935
936
937
938resume_a:
939 command->hit = 0;
940
941
942 if (!(end = strcspn(line, strchr(":btT", c) ? "; \t\r\n\v\f" : "\n"))) {
943
944 if (strchr("btT", c)) continue;
945 else if (!command->arg1) break;
946 }
947
948
949
950
951 if (!command->arg1) command->arg1 = reg - (char*)command;
952 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
953 else if (!pline) {
954 command->arg1 = 0;
955 continue;
956 }
957 reg = extend_string((void *)&command, line, reg - (char *)command, end);
958
959
960 if (strchr("aci", c)) {
961 reg -= end+1;
962 for (i = end; i; i--) {
963 if ((*reg++ = *line++)=='\\') {
964
965
966
967 if (!--i) {
968 *--reg = 0;
969 if (*line) {
970 line++;
971 goto resume_a;
972 }
973 command->hit = 256;
974 break;
975 }
976 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
977 line++;
978 }
979 }
980 *reg = 0;
981 } else line += end;
982
983
984 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
985 }
986
987error:
988 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
989}
990
991void sed_main(void)
992{
993 struct arg_list *al;
994 char **args = toys.optargs;
995
996
997
998
999 if (toys.optflags & FLAG_version) {
1000 xprintf("This is not GNU sed version 9.0\n");
1001 return;
1002 }
1003
1004
1005 if (toys.optflags&FLAG_help) help_exit(0);
1006
1007
1008
1009
1010 if (!TT.e && !TT.f) {
1011 if (!*toys.optargs) error_exit("no pattern");
1012 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1013 }
1014
1015
1016
1017
1018 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1019 for (al = TT.f; al; al = al->next) do_lines(xopenro(al->arg), parse_pattern);
1020 parse_pattern(0, 0);
1021 dlist_terminate(TT.pattern);
1022 if (TT.nextlen) error_exit("no }");
1023
1024 TT.fdout = 1;
1025 TT.remember = xstrdup("");
1026
1027
1028 loopfiles_rw(args, O_RDONLY|WARN_ONLY, 0, do_sed);
1029
1030 if (!(toys.optflags & FLAG_i)) process_line(0, 0);
1031
1032
1033}
1034