1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169#define FOR_sed
170#include "toys.h"
171
172GLOBALS(
173 struct arg_list *f, *e;
174
175
176 struct double_list *pattern;
177
178 char *nextline, *remember;
179 void *restart, *lastregex;
180 long nextlen, rememberlen, count;
181 int fdout, noeol;
182 unsigned xx;
183)
184
185
186
187
188
189
190struct sedcmd {
191 struct sedcmd *next, *prev;
192
193
194 long lmatch[2];
195 int rmatch[2];
196 int arg1, arg2, w;
197 unsigned not, hit;
198 unsigned sflags;
199 char c;
200};
201
202
203static int emit(char *line, long len, int eol)
204{
205 int l, old = line[len];
206
207 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
208 TT.noeol = !eol;
209 if (eol) line[len++] = '\n';
210 if (!len) return 0;
211 l = writeall(TT.fdout, line, len);
212 if (eol) line[len-1] = old;
213 if (l != len) {
214 perror_msg("short write");
215
216 return 1;
217 }
218
219 return 0;
220}
221
222
223
224static char *extend_string(char **old, char *new, int oldlen, int newlen)
225{
226 int newline = newlen < 0;
227 char *s;
228
229 if (newline) newlen = -newlen;
230 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
231 if (newline) s[oldlen++] = '\n';
232 memcpy(s+oldlen, new, newlen);
233 s[oldlen+newlen] = 0;
234
235 return s+oldlen+newlen+1;
236}
237
238
239static void *get_regex(void *trump, int offset)
240{
241 if (!offset) {
242 if (!TT.lastregex) error_exit("no previous regex");
243 return TT.lastregex;
244 }
245
246 return TT.lastregex = offset+(char *)trump;
247}
248
249
250static void sed_line(char **pline, long plen)
251{
252 struct append {
253 struct append *next, *prev;
254 int file;
255 char *str;
256 } *append = 0;
257 char *line = TT.nextline;
258 long len = TT.nextlen;
259 struct sedcmd *command;
260 int eol = 0, tea = 0;
261
262
263 if (!pline && !(toys.optflags&FLAG_i)) return;
264
265
266
267
268 TT.nextline = 0;
269 TT.nextlen = 0;
270 if (pline) {
271 TT.nextline = *pline;
272 TT.nextlen = plen;
273 *pline = 0;
274 }
275
276 if (!line || !len) return;
277 if (line[len-1] == '\n') line[--len] = eol++;
278 TT.count++;
279
280
281
282 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
283 TT.restart = 0;
284
285 while (command) {
286 char *str, c = command->c;
287
288
289 if (*command->lmatch || *command->rmatch) {
290 int miss = 0;
291 long lm;
292
293
294 if (command->hit) {
295 if (!(lm = command->lmatch[1])) {
296 if (!command->rmatch[1]) command->hit = 0;
297 else {
298 void *rm = get_regex(command, command->rmatch[1]);
299
300
301 if (line && !regexec0(rm, line, len, 0, 0, 0)) miss = 1;
302 }
303 } else if (lm > 0 && lm < TT.count) command->hit = 0;
304
305
306 } else {
307 if (!(lm = *command->lmatch)) {
308 void *rm = get_regex(command, *command->rmatch);
309
310 if (line && !regexec0(rm, line, len, 0, 0, 0)) command->hit++;
311 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
312
313 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
314 }
315
316
317 lm = !(command->hit ^ command->not);
318
319
320 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
321
322 if (lm) {
323
324 if (c == '{') {
325 int curly = 1;
326
327 while (curly) {
328 command = command->next;
329 if (command->c == '{') curly++;
330 if (command->c == '}') curly--;
331 }
332 }
333 command = command->next;
334 continue;
335 }
336 }
337
338
339 if (!line) {
340 command = command->next;
341 continue;
342 }
343
344
345
346 if (c=='a' || c=='r') {
347 struct append *a = xzalloc(sizeof(struct append));
348 if (command->arg1) a->str = command->arg1+(char *)command;
349 a->file = c=='r';
350 dlist_add_nomalloc((void *)&append, (void *)a);
351 } else if (c=='b' || c=='t' || c=='T') {
352 int t = tea;
353
354 if (c != 'b') tea = 0;
355 if (c=='b' || t^(c=='T')) {
356 if (!command->arg1) break;
357 str = command->arg1+(char *)command;
358 for (command = (void *)TT.pattern; command; command = command->next)
359 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
360 break;
361 if (!command) error_exit("no :%s", str);
362 }
363 } else if (c=='c') {
364 str = command->arg1+(char *)command;
365 if (!command->hit) emit(str, strlen(str), 1);
366 free(line);
367 line = 0;
368 continue;
369 } else if (c=='d') {
370 free(line);
371 line = 0;
372 continue;
373 } else if (c=='D') {
374
375 str = line;
376 while ((str-line)<len) if (*(str++) == '\n') break;
377 len -= str - line;
378 memmove(line, str, len);
379
380
381
382 if (!len) {
383 free(line);
384 line = 0;
385 } else {
386 line[len] = 0;
387 command = (void *)TT.pattern;
388 }
389 continue;
390 } else if (c=='g') {
391 free(line);
392 line = xstrdup(TT.remember);
393 len = TT.rememberlen;
394 } else if (c=='G') {
395 line = xrealloc(line, len+TT.rememberlen+2);
396 line[len++] = '\n';
397 memcpy(line+len, TT.remember, TT.rememberlen);
398 line[len += TT.rememberlen] = 0;
399 } else if (c=='h') {
400 free(TT.remember);
401 TT.remember = xstrdup(line);
402 TT.rememberlen = len;
403 } else if (c=='H') {
404 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
405 TT.remember[TT.rememberlen++] = '\n';
406 memcpy(TT.remember+TT.rememberlen, line, len);
407 TT.remember[TT.rememberlen += len] = 0;
408 } else if (c=='i') {
409 str = command->arg1+(char *)command;
410 emit(str, strlen(str), 1);
411 } else if (c=='l') {
412 int i, x, off;
413
414 if (!TT.xx) {
415 terminal_size(&TT.xx, 0);
416 if (!TT.xx) TT.xx = 80;
417 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
418 if (TT.xx > 4) TT.xx -= 4;
419 }
420
421 for (i = off = 0; i<len; i++) {
422 if (off >= TT.xx) {
423 toybuf[off++] = '\\';
424 emit(toybuf, off, 1);
425 off = 0;
426 }
427 x = stridx("\\\a\b\f\r\t\v", line[i]);
428 if (x != -1) {
429 toybuf[off++] = '\\';
430 toybuf[off++] = "\\abfrtv"[x];
431 } else if (line[i] >= ' ') toybuf[off++] = line[i];
432 else off += sprintf(toybuf+off, "\\%03o", line[i]);
433 }
434 toybuf[off++] = '$';
435 emit(toybuf, off, 1);
436 } else if (c=='n') {
437 TT.restart = command->next+1;
438
439 break;
440 } else if (c=='N') {
441
442
443 if (pline) {
444 TT.restart = command->next+1;
445 extend_string(&line, TT.nextline, len, -TT.nextlen);
446 free(TT.nextline);
447 TT.nextline = line;
448 TT.nextlen += len + 1;
449 line = 0;
450 }
451
452
453 goto done;
454 } else if (c=='p' || c=='P') {
455 char *l = (c=='P') ? strchr(line, '\n') : 0;
456
457 if (emit(line, l ? l-line : len, eol)) break;
458 } else if (c=='q') {
459 if (pline) *pline = (void *)1;
460 free(TT.nextline);
461 TT.nextline = 0;
462 TT.nextlen = 0;
463
464 break;
465 } else if (c=='s') {
466 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
467 regmatch_t *match = (void *)toybuf;
468 regex_t *reg = get_regex(command, command->arg1);
469 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
470
471
472 while (!regexec0(reg, rline, rlen, 10, match, mflags)) {
473 mflags = REG_NOTBOL;
474
475
476 mlen = match[0].rm_eo-match[0].rm_so;
477 if (!mlen && !zmatch) {
478 if (!rlen--) break;
479 rline++;
480 zmatch++;
481 continue;
482 } else zmatch = 0;
483
484
485 off = command->sflags>>3;
486 if (off && off != ++count) {
487 rline += match[0].rm_eo;
488 rlen -= match[0].rm_eo;
489
490 continue;
491 }
492
493
494 if (match[0].rm_eo > INT_MAX) perror_exit(0);
495
496
497 for (off = newlen = 0; new[off]; off++) {
498 int cc = -1;
499
500 if (new[off] == '&') cc = 0;
501 else if (new[off] == '\\') cc = new[++off] - '0';
502 if (cc < 0 || cc > 9) {
503 newlen++;
504 continue;
505 }
506 newlen += match[cc].rm_eo-match[cc].rm_so;
507 }
508
509
510
511 len += newlen-mlen;
512 swap = xmalloc(len+1);
513 rswap = swap+(rline-line)+match[0].rm_so;
514 memcpy(swap, line, (rline-line)+match[0].rm_so);
515 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
516
517
518 for (off = mlen = 0; new[off]; off++) {
519 int cc = 0, ll;
520
521 if (new[off] == '\\') {
522 cc = new[++off] - '0';
523 if (cc<0 || cc>9) {
524 if (!(rswap[mlen++] = unescape(new[off])))
525 rswap[mlen-1] = new[off];
526
527 continue;
528 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
529 } else if (new[off] != '&') {
530 rswap[mlen++] = new[off];
531
532 continue;
533 }
534
535 ll = match[cc].rm_eo-match[cc].rm_so;
536 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
537 mlen += ll;
538 }
539
540 rline = rswap+newlen;
541 free(line);
542 line = swap;
543
544
545 if (!(command->sflags & 2)) break;
546 }
547
548 if (mflags) {
549
550 if (command->sflags & 4) emit(line, len, eol);
551
552 tea = 1;
553 if (command->w) goto writenow;
554 }
555 } else if (c=='w') {
556 int fd, noeol;
557 char *name;
558
559writenow:
560
561 fd = TT.fdout;
562 noeol = TT.noeol;
563
564
565 name = command->w + (char *)command;
566 memcpy(&TT.fdout, name, 4);
567 name += 4;
568 TT.noeol = *(name++);
569
570
571 if (emit(line, len, eol))
572 perror_exit("w '%s'", command->arg1+(char *)command);
573 *(--name) = TT.noeol;
574 TT.noeol = noeol;
575 TT.fdout = fd;
576 } else if (c=='x') {
577 long swap = TT.rememberlen;
578
579 str = TT.remember;
580 TT.remember = line;
581 line = str;
582 TT.rememberlen = len;
583 len = swap;
584 } else if (c=='y') {
585 char *from, *to = (char *)command;
586 int i, j;
587
588 from = to+command->arg1;
589 to += command->arg2;
590
591 for (i = 0; i < len; i++) {
592 j = stridx(from, line[i]);
593 if (j != -1) line[i] = to[j];
594 }
595 } else if (c=='=') {
596 sprintf(toybuf, "%ld", TT.count);
597 emit(toybuf, strlen(toybuf), 1);
598 }
599
600 command = command->next;
601 }
602
603 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
604
605done:
606 if (dlist_terminate(append)) while (append) {
607 struct append *a = append->next;
608
609 if (append->file) {
610 int fd = open(append->str, O_RDONLY);
611
612
613 if (fd != -1) {
614 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
615 TT.noeol = 0;
616 xsendfile(fd, TT.fdout);
617 close(fd);
618 }
619 } else if (append->str) emit(append->str, strlen(append->str), 1);
620 else emit(line, 0, 0);
621 free(append);
622 append = a;
623 }
624 free(line);
625}
626
627
628static void do_sed_file(int fd, char *name)
629{
630 int i = toys.optflags & FLAG_i;
631 char *tmp;
632
633 if (i) {
634 struct sedcmd *command;
635
636 if (!fd) return error_msg("-i on stdin");
637 TT.fdout = copy_tempfile(fd, name, &tmp);
638 TT.count = 0;
639 for (command = (void *)TT.pattern; command; command = command->next)
640 command->hit = 0;
641 }
642 do_lines(fd, sed_line);
643 if (i) {
644 replace_tempfile(-1, TT.fdout, &tmp);
645 TT.fdout = 1;
646 TT.nextline = 0;
647 TT.nextlen = TT.noeol = 0;
648 }
649}
650
651
652
653
654
655static char *unescape_delimited_string(char **pstr, char *delim)
656{
657 char *to, *from, mode = 0, d;
658
659
660 from = *pstr;
661 if (!delim || !*delim) {
662 if (!(d = *(from++))) return 0;
663 if (d == '\\') d = *(from++);
664 if (!d || d == '\\') return 0;
665 if (delim) *delim = d;
666 } else d = *delim;
667 to = delim = xmalloc(strlen(*pstr)+1);
668
669 while (mode || *from != d) {
670 if (!*from) return 0;
671
672
673 if (*from == '[') {
674 if (!mode) {
675 mode = ']';
676 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
677 } else if (mode == ']' && strchr(".=:", from[1])) {
678 *(to++) = *(from++);
679 mode = *from;
680 }
681 } else if (*from == mode) {
682 if (mode == ']') mode = 0;
683 else {
684 *(to++) = *(from++);
685 mode = ']';
686 }
687
688
689 } else if (mode && *from == '-' && from[-1] == from[1]) {
690 from+=2;
691 continue;
692 } else if (*from == '\\') {
693 if (!from[1]) return 0;
694
695
696 if (from[1] == d) from++;
697 else if (from[1]=='\\') *(to++) = *(from++);
698 else {
699 char c = unescape(from[1]);
700
701 if (c) {
702 *(to++) = c;
703 from+=2;
704 continue;
705 } else if (!mode) *(to++) = *(from++);
706 }
707 }
708 *(to++) = *(from++);
709 }
710 *to = 0;
711 *pstr = from+1;
712
713 return delim;
714}
715
716
717
718static void parse_pattern(char **pline, long len)
719{
720 struct sedcmd *command = (void *)TT.pattern;
721 char *line, *reg, c, *errstart;
722 int i;
723
724 line = errstart = pline ? *pline : "";
725 if (len && line[len-1]=='\n') line[--len] = 0;
726
727
728
729
730
731 if (command && command->prev->hit) {
732
733 TT.pattern = TT.pattern->prev;
734 command = dlist_pop(&TT.pattern);
735 c = command->c;
736 reg = (char *)command;
737 reg += command->arg1 + strlen(reg + command->arg1);
738
739
740
741
742 if (command->hit < 256) goto resume_s;
743 else goto resume_a;
744 }
745
746
747
748 command = 0;
749 for (;;) {
750 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
751
752
753 for (;;) {
754 while (isspace(*line) || *line == ';') line++;
755 if (*line == '#') while (*line && *line != '\n') line++;
756 else break;
757 }
758 if (!*line) return;
759
760
761
762
763 errstart = line;
764 memset(toybuf, 0, sizeof(struct sedcmd));
765 command = (void *)toybuf;
766 reg = toybuf + sizeof(struct sedcmd);
767
768
769 for (i = 0; i < 2; i++) {
770 if (*line == ',') line++;
771 else if (i) break;
772
773 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
774 else if (*line == '$') {
775 command->lmatch[i] = -1;
776 line++;
777 } else if (*line == '/' || *line == '\\') {
778 char *s = line;
779
780 if (!(s = unescape_delimited_string(&line, 0))) goto error;
781 if (!*s) command->rmatch[i] = 0;
782 else {
783 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
784 command->rmatch[i] = reg-toybuf;
785 reg += sizeof(regex_t);
786 }
787 free(s);
788 } else break;
789 }
790
791 while (isspace(*line)) line++;
792 if (!*line) break;
793
794 while (*line == '!') {
795 command->not = 1;
796 line++;
797 }
798 while (isspace(*line)) line++;
799
800 c = command->c = *(line++);
801 if (strchr("}:", c) && i) break;
802 if (strchr("aiqr=", c) && i>1) break;
803
804
805 command = xmemdup(toybuf, reg-toybuf);
806 reg = (reg-toybuf) + (char *)command;
807
808
809 if (c == '{') TT.nextlen++;
810 else if (c == '}') {
811 if (!TT.nextlen--) break;
812 } else if (c == 's') {
813 char *end, delim = 0;
814
815
816
817
818
819
820
821
822 command->arg2 = reg - (char *)command;
823 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
824 goto error;
825
826 reg += sizeof(regex_t);
827 command->arg1 = reg-(char *)command;
828 command->hit = delim;
829resume_s:
830
831
832 end = line;
833 while (*end != command->hit) {
834 if (!*end) goto error;
835 if (*end++ == '\\') {
836 if (!*end || *end == '\n') {
837 end[-1] = '\n';
838 break;
839 }
840 end++;
841 }
842 }
843
844 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
845 line = end;
846
847 if (*line == command->hit) command->hit = 0;
848 else {
849 if (!*line) continue;
850 reg--;
851 line++;
852 goto resume_s;
853 }
854
855
856 i = command->arg1;
857 command->arg1 = command->arg2;
858 command->arg2 = i;
859
860
861 for (line++; *line; line++) {
862 long l;
863
864 if (isspace(*line) && *line != '\n') continue;
865
866 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
867 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
868 command->sflags |= l << 3;
869 line--;
870 } else break;
871 }
872
873
874
875 if (!*TT.remember) command->arg1 = 0;
876 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
877 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((command->sflags&1)*REG_ICASE));
878 free(TT.remember);
879 TT.remember = 0;
880 if (*line == 'w') {
881 line++;
882 goto writenow;
883 }
884 } else if (c == 'w') {
885 int fd, delim;
886 char *cc;
887
888
889
890
891
892
893writenow:
894 while (isspace(*line)) line++;
895 if (!*line) goto error;
896 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
897 delim = *cc;
898 *cc = 0;
899 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
900 *cc = delim;
901
902 command->w = reg - (char *)command;
903 command = xrealloc(command, command->w+(cc-line)+6);
904 reg = command->w + (char *)command;
905
906 memcpy(reg, &fd, 4);
907 reg += 4;
908 *(reg++) = 0;
909 memcpy(reg, line, delim);
910 reg += delim;
911 *(reg++) = 0;
912
913 line = cc;
914 if (delim) line += 2;
915 } else if (c == 'y') {
916 char *s, delim = 0;
917 int len;
918
919 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
920 command->arg1 = reg-(char *)command;
921 len = strlen(s);
922 reg = extend_string((void *)&command, s, reg-(char *)command, len);
923 free(s);
924 command->arg2 = reg-(char *)command;
925 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
926 if (len != strlen(s)) goto error;
927 reg = extend_string((void *)&command, s, reg-(char*)command, len);
928 free(s);
929 } else if (strchr("abcirtTw:", c)) {
930 int end;
931
932
933 while (isspace(*line) && *line != '\n') line++;
934
935
936
937resume_a:
938 command->hit = 0;
939
940
941 if (!(end = strcspn(line, strchr(":btT", c) ? "; \t\r\n\v\f" : "\n"))) {
942
943 if (strchr("btT", c)) continue;
944 else if (!command->arg1) break;
945 }
946
947
948
949
950 if (!command->arg1) command->arg1 = reg - (char*)command;
951 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
952 else if (!pline) {
953 command->arg1 = 0;
954 continue;
955 }
956 reg = extend_string((void *)&command, line, reg - (char *)command, end);
957
958
959 if (strchr("aci", c)) {
960 reg -= end+1;
961 for (i = end; i; i--) {
962 if ((*reg++ = *line++)=='\\') {
963
964
965
966 if (!--i) {
967 *--reg = 0;
968 if (*line) {
969 line++;
970 goto resume_a;
971 }
972 command->hit = 256;
973 break;
974 }
975 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
976 line++;
977 }
978 }
979 *reg = 0;
980 } else line += end;
981
982
983 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
984 }
985
986error:
987 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
988}
989
990void sed_main(void)
991{
992 struct arg_list *al;
993 char **args = toys.optargs;
994
995
996
997
998 if (toys.optflags & FLAG_version) {
999 xprintf("This is not GNU sed version 9.0\n");
1000 return;
1001 }
1002
1003
1004 if (toys.optflags&FLAG_help) help_exit(0);
1005
1006
1007
1008
1009 if (!TT.e && !TT.f) {
1010 if (!*toys.optargs) error_exit("no pattern");
1011 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1012 }
1013
1014
1015
1016
1017 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1018 parse_pattern(0, 0);
1019 for (al = TT.f; al; al = al->next) do_lines(xopenro(al->arg), parse_pattern);
1020 dlist_terminate(TT.pattern);
1021 if (TT.nextlen) error_exit("no }");
1022
1023 TT.fdout = 1;
1024 TT.remember = xstrdup("");
1025
1026
1027 loopfiles_rw(args, O_RDONLY|WARN_ONLY, 0, do_sed_file);
1028
1029
1030 if (!(toys.optflags & FLAG_i)) {
1031 toys.optflags |= FLAG_i;
1032 sed_line(0, 0);
1033 }
1034
1035
1036}
1037