1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169#define FOR_sed
170#include "toys.h"
171
172GLOBALS(
173 struct arg_list *f;
174 struct arg_list *e;
175
176
177 struct double_list *pattern;
178
179 char *nextline, *remember;
180 void *restart, *lastregex;
181 long nextlen, rememberlen, count;
182 int fdout, noeol;
183 unsigned xx;
184)
185
186
187
188
189
190
191struct sedcmd {
192 struct sedcmd *next, *prev;
193
194
195 long lmatch[2];
196 int rmatch[2];
197 int arg1, arg2, w;
198 unsigned not, hit;
199 unsigned sflags;
200 char c;
201};
202
203
204static int emit(char *line, long len, int eol)
205{
206 int l, old = line[len];
207
208 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
209 TT.noeol = !eol;
210 if (eol) line[len++] = '\n';
211 if (!len) return 0;
212 l = writeall(TT.fdout, line, len);
213 if (eol) line[len-1] = old;
214 if (l != len) {
215 perror_msg("short write");
216
217 return 1;
218 }
219
220 return 0;
221}
222
223
224
225static char *extend_string(char **old, char *new, int oldlen, int newlen)
226{
227 int newline = newlen < 0;
228 char *s;
229
230 if (newline) newlen = -newlen;
231 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
232 if (newline) s[oldlen++] = '\n';
233 memcpy(s+oldlen, new, newlen);
234 s[oldlen+newlen] = 0;
235
236 return s+oldlen+newlen+1;
237}
238
239
240static void *get_regex(void *trump, int offset)
241{
242 if (!offset) {
243 if (!TT.lastregex) error_exit("no previous regex");
244 return TT.lastregex;
245 }
246
247 return TT.lastregex = offset+(char *)trump;
248}
249
250
251static void sed_line(char **pline, long plen)
252{
253 struct append {
254 struct append *next, *prev;
255 int file;
256 char *str;
257 } *append = 0;
258 char *line = TT.nextline;
259 long len = TT.nextlen;
260 struct sedcmd *command;
261 int eol = 0, tea = 0;
262
263
264 if (!pline && !(toys.optflags&FLAG_i)) return;
265
266
267
268
269 TT.nextline = 0;
270 TT.nextlen = 0;
271 if (pline) {
272 TT.nextline = *pline;
273 TT.nextlen = plen;
274 *pline = 0;
275 }
276
277 if (!line || !len) return;
278 if (line[len-1] == '\n') line[--len] = eol++;
279 TT.count++;
280
281
282
283 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
284 TT.restart = 0;
285
286 while (command) {
287 char *str, c = command->c;
288
289
290 if (*command->lmatch || *command->rmatch) {
291 int miss = 0;
292 long lm;
293
294
295 if (command->hit) {
296 if (!(lm = command->lmatch[1])) {
297 if (!command->rmatch[1]) command->hit = 0;
298 else {
299 void *rm = get_regex(command, command->rmatch[1]);
300
301
302 if (line && !regexec0(rm, line, len, 0, 0, 0)) miss = 1;
303 }
304 } else if (lm > 0 && lm < TT.count) command->hit = 0;
305
306
307 } else {
308 if (!(lm = *command->lmatch)) {
309 void *rm = get_regex(command, *command->rmatch);
310
311 if (line && !regexec0(rm, line, len, 0, 0, 0)) command->hit++;
312 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
313
314 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
315 }
316
317
318 lm = !(command->hit ^ command->not);
319
320
321 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
322
323 if (lm) {
324
325 if (c == '{') {
326 int curly = 1;
327
328 while (curly) {
329 command = command->next;
330 if (command->c == '{') curly++;
331 if (command->c == '}') curly--;
332 }
333 }
334 command = command->next;
335 continue;
336 }
337 }
338
339
340 if (!line) {
341 command = command->next;
342 continue;
343 }
344
345
346
347 if (c=='a' || c=='r') {
348 struct append *a = xzalloc(sizeof(struct append));
349 if (command->arg1) a->str = command->arg1+(char *)command;
350 a->file = c=='r';
351 dlist_add_nomalloc((void *)&append, (void *)a);
352 } else if (c=='b' || c=='t' || c=='T') {
353 int t = tea;
354
355 if (c != 'b') tea = 0;
356 if (c=='b' || t^(c=='T')) {
357 if (!command->arg1) break;
358 str = command->arg1+(char *)command;
359 for (command = (void *)TT.pattern; command; command = command->next)
360 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
361 break;
362 if (!command) error_exit("no :%s", str);
363 }
364 } else if (c=='c') {
365 str = command->arg1+(char *)command;
366 if (!command->hit) emit(str, strlen(str), 1);
367 free(line);
368 line = 0;
369 continue;
370 } else if (c=='d') {
371 free(line);
372 line = 0;
373 continue;
374 } else if (c=='D') {
375
376 str = line;
377 while ((str-line)<len) if (*(str++) == '\n') break;
378 len -= str - line;
379 memmove(line, str, len);
380
381
382
383 if (!len) {
384 free(line);
385 line = 0;
386 } else {
387 line[len] = 0;
388 command = (void *)TT.pattern;
389 }
390 continue;
391 } else if (c=='g') {
392 free(line);
393 line = xstrdup(TT.remember);
394 len = TT.rememberlen;
395 } else if (c=='G') {
396 line = xrealloc(line, len+TT.rememberlen+2);
397 line[len++] = '\n';
398 memcpy(line+len, TT.remember, TT.rememberlen);
399 line[len += TT.rememberlen] = 0;
400 } else if (c=='h') {
401 free(TT.remember);
402 TT.remember = xstrdup(line);
403 TT.rememberlen = len;
404 } else if (c=='H') {
405 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
406 TT.remember[TT.rememberlen++] = '\n';
407 memcpy(TT.remember+TT.rememberlen, line, len);
408 TT.remember[TT.rememberlen += len] = 0;
409 } else if (c=='i') {
410 str = command->arg1+(char *)command;
411 emit(str, strlen(str), 1);
412 } else if (c=='l') {
413 int i, x, off;
414
415 if (!TT.xx) {
416 terminal_size(&TT.xx, 0);
417 if (!TT.xx) TT.xx = 80;
418 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
419 if (TT.xx > 4) TT.xx -= 4;
420 }
421
422 for (i = off = 0; i<len; i++) {
423 if (off >= TT.xx) {
424 toybuf[off++] = '\\';
425 emit(toybuf, off, 1);
426 off = 0;
427 }
428 x = stridx("\\\a\b\f\r\t\v", line[i]);
429 if (x != -1) {
430 toybuf[off++] = '\\';
431 toybuf[off++] = "\\abfrtv"[x];
432 } else if (line[i] >= ' ') toybuf[off++] = line[i];
433 else off += sprintf(toybuf+off, "\\%03o", line[i]);
434 }
435 toybuf[off++] = '$';
436 emit(toybuf, off, 1);
437 } else if (c=='n') {
438 TT.restart = command->next+1;
439
440 break;
441 } else if (c=='N') {
442
443
444 if (pline) {
445 TT.restart = command->next+1;
446 extend_string(&line, TT.nextline, len, -TT.nextlen);
447 free(TT.nextline);
448 TT.nextline = line;
449 TT.nextlen += len + 1;
450 line = 0;
451 }
452
453
454 goto done;
455 } else if (c=='p' || c=='P') {
456 char *l = (c=='P') ? strchr(line, '\n') : 0;
457
458 if (emit(line, l ? l-line : len, eol)) break;
459 } else if (c=='q') {
460 if (pline) *pline = (void *)1;
461 free(TT.nextline);
462 TT.nextline = 0;
463 TT.nextlen = 0;
464
465 break;
466 } else if (c=='s') {
467 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
468 regmatch_t *match = (void *)toybuf;
469 regex_t *reg = get_regex(command, command->arg1);
470 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
471
472
473 while (!regexec0(reg, rline, rlen, 10, match, mflags)) {
474 mflags = REG_NOTBOL;
475
476
477 mlen = match[0].rm_eo-match[0].rm_so;
478 if (!mlen && !zmatch) {
479 if (!rlen--) break;
480 rline++;
481 zmatch++;
482 continue;
483 } else zmatch = 0;
484
485
486 off = command->sflags>>3;
487 if (off && off != ++count) {
488 rline += match[0].rm_eo;
489 rlen -= match[0].rm_eo;
490
491 continue;
492 }
493
494
495 if (match[0].rm_eo > INT_MAX) perror_exit(0);
496
497
498 for (off = newlen = 0; new[off]; off++) {
499 int cc = -1;
500
501 if (new[off] == '&') cc = 0;
502 else if (new[off] == '\\') cc = new[++off] - '0';
503 if (cc < 0 || cc > 9) {
504 newlen++;
505 continue;
506 }
507 newlen += match[cc].rm_eo-match[cc].rm_so;
508 }
509
510
511
512 len += newlen-mlen;
513 swap = xmalloc(len+1);
514 rswap = swap+(rline-line)+match[0].rm_so;
515 memcpy(swap, line, (rline-line)+match[0].rm_so);
516 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
517
518
519 for (off = mlen = 0; new[off]; off++) {
520 int cc = 0, ll;
521
522 if (new[off] == '\\') {
523 cc = new[++off] - '0';
524 if (cc<0 || cc>9) {
525 if (!(rswap[mlen++] = unescape(new[off])))
526 rswap[mlen-1] = new[off];
527
528 continue;
529 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
530 } else if (new[off] != '&') {
531 rswap[mlen++] = new[off];
532
533 continue;
534 }
535
536 ll = match[cc].rm_eo-match[cc].rm_so;
537 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
538 mlen += ll;
539 }
540
541 rline = rswap+newlen;
542 free(line);
543 line = swap;
544
545
546 if (!(command->sflags & 2)) break;
547 }
548
549 if (mflags) {
550
551 if (command->sflags & 4) emit(line, len, eol);
552
553 tea = 1;
554 if (command->w) goto writenow;
555 }
556 } else if (c=='w') {
557 int fd, noeol;
558 char *name;
559
560writenow:
561
562 fd = TT.fdout;
563 noeol = TT.noeol;
564
565
566 name = command->w + (char *)command;
567 memcpy(&TT.fdout, name, 4);
568 name += 4;
569 TT.noeol = *(name++);
570
571
572 if (emit(line, len, eol))
573 perror_exit("w '%s'", command->arg1+(char *)command);
574 *(--name) = TT.noeol;
575 TT.noeol = noeol;
576 TT.fdout = fd;
577 } else if (c=='x') {
578 long swap = TT.rememberlen;
579
580 str = TT.remember;
581 TT.remember = line;
582 line = str;
583 TT.rememberlen = len;
584 len = swap;
585 } else if (c=='y') {
586 char *from, *to = (char *)command;
587 int i, j;
588
589 from = to+command->arg1;
590 to += command->arg2;
591
592 for (i = 0; i < len; i++) {
593 j = stridx(from, line[i]);
594 if (j != -1) line[i] = to[j];
595 }
596 } else if (c=='=') {
597 sprintf(toybuf, "%ld", TT.count);
598 emit(toybuf, strlen(toybuf), 1);
599 }
600
601 command = command->next;
602 }
603
604 if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
605
606done:
607 if (dlist_terminate(append)) while (append) {
608 struct append *a = append->next;
609
610 if (append->file) {
611 int fd = open(append->str, O_RDONLY);
612
613
614 if (fd != -1) {
615 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
616 TT.noeol = 0;
617 xsendfile(fd, TT.fdout);
618 close(fd);
619 }
620 } else if (append->str) emit(append->str, strlen(append->str), 1);
621 else emit(line, 0, 0);
622 free(append);
623 append = a;
624 }
625 free(line);
626}
627
628
629static void do_sed_file(int fd, char *name)
630{
631 int i = toys.optflags & FLAG_i;
632 char *tmp;
633
634 if (i) {
635 struct sedcmd *command;
636
637 if (!fd) return error_msg("-i on stdin");
638 TT.fdout = copy_tempfile(fd, name, &tmp);
639 TT.count = 0;
640 for (command = (void *)TT.pattern; command; command = command->next)
641 command->hit = 0;
642 }
643 do_lines(fd, sed_line);
644 if (i) {
645 replace_tempfile(-1, TT.fdout, &tmp);
646 TT.fdout = 1;
647 TT.nextline = 0;
648 TT.nextlen = TT.noeol = 0;
649 }
650}
651
652
653
654
655
656static char *unescape_delimited_string(char **pstr, char *delim)
657{
658 char *to, *from, mode = 0, d;
659
660
661 from = *pstr;
662 if (!delim || !*delim) {
663 if (!(d = *(from++))) return 0;
664 if (d == '\\') d = *(from++);
665 if (!d || d == '\\') return 0;
666 if (delim) *delim = d;
667 } else d = *delim;
668 to = delim = xmalloc(strlen(*pstr)+1);
669
670 while (mode || *from != d) {
671 if (!*from) return 0;
672
673
674 if (*from == '[') {
675 if (!mode) {
676 mode = ']';
677 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
678 } else if (mode == ']' && strchr(".=:", from[1])) {
679 *(to++) = *(from++);
680 mode = *from;
681 }
682 } else if (*from == mode) {
683 if (mode == ']') mode = 0;
684 else {
685 *(to++) = *(from++);
686 mode = ']';
687 }
688
689
690 } else if (mode && *from == '-' && from[-1] == from[1]) {
691 from+=2;
692 continue;
693 } else if (*from == '\\') {
694 if (!from[1]) return 0;
695
696
697 if (from[1] == d) from++;
698 else if (from[1]=='\\') *(to++) = *(from++);
699 else {
700 char c = unescape(from[1]);
701
702 if (c) {
703 *(to++) = c;
704 from+=2;
705 continue;
706 } else if (!mode) *(to++) = *(from++);
707 }
708 }
709 *(to++) = *(from++);
710 }
711 *to = 0;
712 *pstr = from+1;
713
714 return delim;
715}
716
717
718
719static void parse_pattern(char **pline, long len)
720{
721 struct sedcmd *command = (void *)TT.pattern;
722 char *line, *reg, c, *errstart;
723 int i;
724
725 line = errstart = pline ? *pline : "";
726 if (len && line[len-1]=='\n') line[--len] = 0;
727
728
729
730
731
732 if (command && command->prev->hit) {
733
734 TT.pattern = TT.pattern->prev;
735 command = dlist_pop(&TT.pattern);
736 c = command->c;
737 reg = (char *)command;
738 reg += command->arg1 + strlen(reg + command->arg1);
739
740
741
742
743 if (command->hit < 256) goto resume_s;
744 else goto resume_a;
745 }
746
747
748
749 command = 0;
750 for (;;) {
751 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
752
753
754 for (;;) {
755 while (isspace(*line) || *line == ';') line++;
756 if (*line == '#') while (*line && *line != '\n') line++;
757 else break;
758 }
759 if (!*line) return;
760
761
762
763
764 errstart = line;
765 memset(toybuf, 0, sizeof(struct sedcmd));
766 command = (void *)toybuf;
767 reg = toybuf + sizeof(struct sedcmd);
768
769
770 for (i = 0; i < 2; i++) {
771 if (*line == ',') line++;
772 else if (i) break;
773
774 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
775 else if (*line == '$') {
776 command->lmatch[i] = -1;
777 line++;
778 } else if (*line == '/' || *line == '\\') {
779 char *s = line;
780
781 if (!(s = unescape_delimited_string(&line, 0))) goto error;
782 if (!*s) command->rmatch[i] = 0;
783 else {
784 xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
785 command->rmatch[i] = reg-toybuf;
786 reg += sizeof(regex_t);
787 }
788 free(s);
789 } else break;
790 }
791
792 while (isspace(*line)) line++;
793 if (!*line) break;
794
795 while (*line == '!') {
796 command->not = 1;
797 line++;
798 }
799 while (isspace(*line)) line++;
800
801 c = command->c = *(line++);
802 if (strchr("}:", c) && i) break;
803 if (strchr("aiqr=", c) && i>1) break;
804
805
806 command = xmemdup(toybuf, reg-toybuf);
807 reg = (reg-toybuf) + (char *)command;
808
809
810 if (c == '{') TT.nextlen++;
811 else if (c == '}') {
812 if (!TT.nextlen--) break;
813 } else if (c == 's') {
814 char *end, delim = 0;
815
816
817
818
819
820
821
822
823 command->arg2 = reg - (char *)command;
824 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
825 goto error;
826
827 reg += sizeof(regex_t);
828 command->arg1 = reg-(char *)command;
829 command->hit = delim;
830resume_s:
831
832
833 end = line;
834 while (*end != command->hit) {
835 if (!*end) goto error;
836 if (*end++ == '\\') {
837 if (!*end || *end == '\n') {
838 end[-1] = '\n';
839 break;
840 }
841 end++;
842 }
843 }
844
845 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
846 line = end;
847
848 if (*line == command->hit) command->hit = 0;
849 else {
850 if (!*line) continue;
851 reg--;
852 line++;
853 goto resume_s;
854 }
855
856
857 i = command->arg1;
858 command->arg1 = command->arg2;
859 command->arg2 = i;
860
861
862 for (line++; *line; line++) {
863 long l;
864
865 if (isspace(*line) && *line != '\n') continue;
866
867 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
868 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
869 command->sflags |= l << 3;
870 line--;
871 } else break;
872 }
873
874
875
876 if (!*TT.remember) command->arg1 = 0;
877 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
878 ((toys.optflags & FLAG_r)*REG_EXTENDED)|((command->sflags&1)*REG_ICASE));
879 free(TT.remember);
880 TT.remember = 0;
881 if (*line == 'w') {
882 line++;
883 goto writenow;
884 }
885 } else if (c == 'w') {
886 int fd, delim;
887 char *cc;
888
889
890
891
892
893
894writenow:
895 while (isspace(*line)) line++;
896 if (!*line) goto error;
897 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
898 delim = *cc;
899 *cc = 0;
900 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
901 *cc = delim;
902
903 command->w = reg - (char *)command;
904 command = xrealloc(command, command->w+(cc-line)+6);
905 reg = command->w + (char *)command;
906
907 memcpy(reg, &fd, 4);
908 reg += 4;
909 *(reg++) = 0;
910 memcpy(reg, line, delim);
911 reg += delim;
912 *(reg++) = 0;
913
914 line = cc;
915 if (delim) line += 2;
916 } else if (c == 'y') {
917 char *s, delim = 0;
918 int len;
919
920 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
921 command->arg1 = reg-(char *)command;
922 len = strlen(s);
923 reg = extend_string((void *)&command, s, reg-(char *)command, len);
924 free(s);
925 command->arg2 = reg-(char *)command;
926 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
927 if (len != strlen(s)) goto error;
928 reg = extend_string((void *)&command, s, reg-(char*)command, len);
929 free(s);
930 } else if (strchr("abcirtTw:", c)) {
931 int end;
932
933
934 while (isspace(*line) && *line != '\n') line++;
935
936
937
938resume_a:
939 command->hit = 0;
940
941
942 if (!(end = strcspn(line, strchr(":btT", c) ? "; \t\r\n\v\f" : "\n"))) {
943
944 if (strchr("btT", c)) continue;
945 else if (!command->arg1) break;
946 }
947
948
949
950
951 if (!command->arg1) command->arg1 = reg - (char*)command;
952 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
953 else if (!pline) {
954 command->arg1 = 0;
955 continue;
956 }
957 reg = extend_string((void *)&command, line, reg - (char *)command, end);
958
959
960 if (strchr("aci", c)) {
961 reg -= end+1;
962 for (i = end; i; i--) {
963 if ((*reg++ = *line++)=='\\') {
964
965
966
967 if (!--i) {
968 *--reg = 0;
969 if (*line) {
970 line++;
971 goto resume_a;
972 }
973 command->hit = 256;
974 break;
975 }
976 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
977 line++;
978 }
979 }
980 *reg = 0;
981 } else line += end;
982
983
984 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
985 }
986
987error:
988 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
989}
990
991void sed_main(void)
992{
993 struct arg_list *al;
994 char **args = toys.optargs;
995
996
997
998
999 if (toys.optflags & FLAG_version) {
1000 xprintf("This is not GNU sed version 9.0\n");
1001 return;
1002 }
1003
1004
1005 if (toys.optflags&FLAG_help) help_exit(0);
1006
1007
1008
1009
1010 if (!TT.e && !TT.f) {
1011 if (!*toys.optargs) error_exit("no pattern");
1012 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1013 }
1014
1015
1016
1017
1018 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1019 parse_pattern(0, 0);
1020 for (al = TT.f; al; al = al->next) do_lines(xopenro(al->arg), parse_pattern);
1021 dlist_terminate(TT.pattern);
1022 if (TT.nextlen) error_exit("no }");
1023
1024 TT.fdout = 1;
1025 TT.remember = xstrdup("");
1026
1027
1028 loopfiles_rw(args, O_RDONLY|WARN_ONLY, 0, do_sed_file);
1029
1030
1031 if (!(toys.optflags & FLAG_i)) {
1032 toys.optflags |= FLAG_i;
1033 sed_line(0, 0);
1034 }
1035
1036
1037}
1038