1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170#define FOR_sed
171#include "toys.h"
172
173GLOBALS(
174 char *i;
175 struct arg_list *f, *e;
176
177
178 struct double_list *pattern;
179
180 char *nextline, *remember;
181 void *restart, *lastregex;
182 long nextlen, rememberlen, count;
183 int fdout, noeol;
184 unsigned xx;
185 char delim;
186)
187
188
189
190
191
192
193struct sedcmd {
194 struct sedcmd *next, *prev;
195
196
197 long lmatch[2];
198 int rmatch[2];
199 int arg1, arg2, w;
200 unsigned not, hit;
201 unsigned sflags;
202 char c;
203};
204
205
206static int emit(char *line, long len, int eol)
207{
208 int l, old = line[len];
209
210 if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
211 TT.noeol = !eol;
212 if (eol) line[len++] = '\n';
213 if (!len) return 0;
214 l = writeall(TT.fdout, line, len);
215 if (eol) line[len-1] = old;
216 if (l != len) {
217 perror_msg("short write");
218
219 return 1;
220 }
221
222 return 0;
223}
224
225
226
227static char *extend_string(char **old, char *new, int oldlen, int newlen)
228{
229 int newline = newlen < 0;
230 char *s;
231
232 if (newline) newlen = -newlen;
233 s = *old = xrealloc(*old, oldlen+newlen+newline+1);
234 if (newline) s[oldlen++] = '\n';
235 memcpy(s+oldlen, new, newlen);
236 s[oldlen+newlen] = 0;
237
238 return s+oldlen+newlen+1;
239}
240
241
242static void *get_regex(void *trump, int offset)
243{
244 if (!offset) {
245 if (!TT.lastregex) error_exit("no previous regex");
246 return TT.lastregex;
247 }
248
249 return TT.lastregex = offset+(char *)trump;
250}
251
252
253static void sed_line(char **pline, long plen)
254{
255 struct append {
256 struct append *next, *prev;
257 int file;
258 char *str;
259 } *append = 0;
260 char *line = TT.nextline;
261 long len = TT.nextlen;
262 struct sedcmd *command;
263 int eol = 0, tea = 0;
264
265
266 if (!pline && !FLAG(i)) return;
267
268
269
270
271 TT.nextline = 0;
272 TT.nextlen = 0;
273 if (pline) {
274 TT.nextline = *pline;
275 TT.nextlen = plen;
276 *pline = 0;
277 }
278
279 if (!line || !len) return;
280 if (line[len-1] == '\n') line[--len] = eol++;
281 TT.count++;
282
283
284
285 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern;
286 TT.restart = 0;
287
288 while (command) {
289 char *str, c = command->c;
290
291
292 if (*command->lmatch || *command->rmatch) {
293 int miss = 0;
294 long lm;
295
296
297 if (command->hit) {
298 if (!(lm = command->lmatch[1])) {
299 if (!command->rmatch[1]) command->hit = 0;
300 else {
301 void *rm = get_regex(command, command->rmatch[1]);
302
303
304 if (line && !regexec0(rm, line, len, 0, 0, 0)) miss = 1;
305 }
306 } else if (lm > 0 && lm < TT.count) command->hit = 0;
307
308
309 } else {
310 if (!(lm = *command->lmatch)) {
311 void *rm = get_regex(command, *command->rmatch);
312
313 if (line && !regexec0(rm, line, len, 0, 0, 0)) command->hit++;
314 } else if (lm == TT.count || (lm == -1 && !pline)) command->hit++;
315
316 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
317 }
318
319
320 lm = !(command->hit ^ command->not);
321
322
323 if (miss || command->lmatch[1] == TT.count) command->hit = 0;
324
325 if (lm) {
326
327 if (c == '{') {
328 int curly = 1;
329
330 while (curly) {
331 command = command->next;
332 if (command->c == '{') curly++;
333 if (command->c == '}') curly--;
334 }
335 }
336 command = command->next;
337 continue;
338 }
339 }
340
341
342 if (!line) {
343 command = command->next;
344 continue;
345 }
346
347
348
349 if (c=='a' || c=='r') {
350 struct append *a = xzalloc(sizeof(struct append));
351 if (command->arg1) a->str = command->arg1+(char *)command;
352 a->file = c=='r';
353 dlist_add_nomalloc((void *)&append, (void *)a);
354 } else if (c=='b' || c=='t' || c=='T') {
355 int t = tea;
356
357 if (c != 'b') tea = 0;
358 if (c=='b' || t^(c=='T')) {
359 if (!command->arg1) break;
360 str = command->arg1+(char *)command;
361 for (command = (void *)TT.pattern; command; command = command->next)
362 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
363 break;
364 if (!command) error_exit("no :%s", str);
365 }
366 } else if (c=='c') {
367 str = command->arg1+(char *)command;
368 if (!command->hit) emit(str, strlen(str), 1);
369 free(line);
370 line = 0;
371 continue;
372 } else if (c=='d') {
373 free(line);
374 line = 0;
375 continue;
376 } else if (c=='D') {
377
378 str = line;
379 while ((str-line)<len) if (*(str++) == '\n') break;
380 len -= str - line;
381 memmove(line, str, len);
382
383
384
385 if (!len) {
386 free(line);
387 line = 0;
388 } else {
389 line[len] = 0;
390 command = (void *)TT.pattern;
391 }
392 continue;
393 } else if (c=='g') {
394 free(line);
395 line = xstrdup(TT.remember);
396 len = TT.rememberlen;
397 } else if (c=='G') {
398 line = xrealloc(line, len+TT.rememberlen+2);
399 line[len++] = '\n';
400 memcpy(line+len, TT.remember, TT.rememberlen);
401 line[len += TT.rememberlen] = 0;
402 } else if (c=='h') {
403 free(TT.remember);
404 TT.remember = xstrdup(line);
405 TT.rememberlen = len;
406 } else if (c=='H') {
407 TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
408 TT.remember[TT.rememberlen++] = '\n';
409 memcpy(TT.remember+TT.rememberlen, line, len);
410 TT.remember[TT.rememberlen += len] = 0;
411 } else if (c=='i') {
412 str = command->arg1+(char *)command;
413 emit(str, strlen(str), 1);
414 } else if (c=='l') {
415 int i, x, off;
416
417 if (!TT.xx) {
418 terminal_size(&TT.xx, 0);
419 if (!TT.xx) TT.xx = 80;
420 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
421 if (TT.xx > 4) TT.xx -= 4;
422 }
423
424 for (i = off = 0; i<len; i++) {
425 if (off >= TT.xx) {
426 toybuf[off++] = '\\';
427 emit(toybuf, off, 1);
428 off = 0;
429 }
430 x = stridx("\\\a\b\f\r\t\v", line[i]);
431 if (x != -1) {
432 toybuf[off++] = '\\';
433 toybuf[off++] = "\\abfrtv"[x];
434 } else if (line[i] >= ' ') toybuf[off++] = line[i];
435 else off += sprintf(toybuf+off, "\\%03o", line[i]);
436 }
437 toybuf[off++] = '$';
438 emit(toybuf, off, 1);
439 } else if (c=='n') {
440 TT.restart = command->next+1;
441
442 break;
443 } else if (c=='N') {
444
445
446 if (pline) {
447 TT.restart = command->next+1;
448 extend_string(&line, TT.nextline, len, -TT.nextlen);
449 free(TT.nextline);
450 TT.nextline = line;
451 TT.nextlen += len + 1;
452 line = 0;
453 }
454
455
456 goto done;
457 } else if (c=='p' || c=='P') {
458 char *l = (c=='P') ? strchr(line, '\n') : 0;
459
460 if (emit(line, l ? l-line : len, eol)) break;
461 } else if (c=='q') {
462 if (pline) *pline = (void *)1;
463 free(TT.nextline);
464 TT.nextline = 0;
465 TT.nextlen = 0;
466
467 break;
468 } else if (c=='s') {
469 char *rline = line, *new = command->arg2 + (char *)command, *swap, *rswap;
470 regmatch_t *match = (void *)toybuf;
471 regex_t *reg = get_regex(command, command->arg1);
472 int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
473
474
475 while (!regexec0(reg, rline, rlen, 10, match, mflags)) {
476 mflags = REG_NOTBOL;
477
478
479 mlen = match[0].rm_eo-match[0].rm_so;
480 if (!mlen && !zmatch) {
481 if (!rlen--) break;
482 rline++;
483 zmatch++;
484 continue;
485 } else zmatch = 0;
486
487
488 off = command->sflags>>3;
489 if (off && off != ++count) {
490 rline += match[0].rm_eo;
491 rlen -= match[0].rm_eo;
492
493 continue;
494 }
495
496
497 if (match[0].rm_eo > INT_MAX) perror_exit(0);
498
499
500 for (off = newlen = 0; new[off]; off++) {
501 int cc = -1;
502
503 if (new[off] == '&') cc = 0;
504 else if (new[off] == '\\') cc = new[++off] - '0';
505 if (cc < 0 || cc > 9) {
506 newlen++;
507 continue;
508 }
509 newlen += match[cc].rm_eo-match[cc].rm_so;
510 }
511
512
513
514 len += newlen-mlen;
515 swap = xmalloc(len+1);
516 rswap = swap+(rline-line)+match[0].rm_so;
517 memcpy(swap, line, (rline-line)+match[0].rm_so);
518 memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
519
520
521 for (off = mlen = 0; new[off]; off++) {
522 int cc = 0, ll;
523
524 if (new[off] == '\\') {
525 cc = new[++off] - '0';
526 if (cc<0 || cc>9) {
527 if (!(rswap[mlen++] = unescape(new[off])))
528 rswap[mlen-1] = new[off];
529
530 continue;
531 } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
532 } else if (new[off] != '&') {
533 rswap[mlen++] = new[off];
534
535 continue;
536 }
537
538 ll = match[cc].rm_eo-match[cc].rm_so;
539 memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
540 mlen += ll;
541 }
542
543 rline = rswap+newlen;
544 free(line);
545 line = swap;
546
547
548 if (!(command->sflags & 2)) break;
549 }
550
551 if (mflags) {
552
553 if (command->sflags & 4) emit(line, len, eol);
554
555 tea = 1;
556 if (command->w) goto writenow;
557 }
558 } else if (c=='w') {
559 int fd, noeol;
560 char *name;
561
562writenow:
563
564 fd = TT.fdout;
565 noeol = TT.noeol;
566
567
568 name = command->w + (char *)command;
569 memcpy(&TT.fdout, name, 4);
570 name += 4;
571 TT.noeol = *(name++);
572
573
574 if (emit(line, len, eol))
575 perror_exit("w '%s'", command->arg1+(char *)command);
576 *(--name) = TT.noeol;
577 TT.noeol = noeol;
578 TT.fdout = fd;
579 } else if (c=='x') {
580 long swap = TT.rememberlen;
581
582 str = TT.remember;
583 TT.remember = line;
584 line = str;
585 TT.rememberlen = len;
586 len = swap;
587 } else if (c=='y') {
588 char *from, *to = (char *)command;
589 int i, j;
590
591 from = to+command->arg1;
592 to += command->arg2;
593
594 for (i = 0; i < len; i++) {
595 j = stridx(from, line[i]);
596 if (j != -1) line[i] = to[j];
597 }
598 } else if (c=='=') {
599 sprintf(toybuf, "%ld", TT.count);
600 emit(toybuf, strlen(toybuf), 1);
601 }
602
603 command = command->next;
604 }
605
606 if (line && !FLAG(n)) emit(line, len, eol);
607
608done:
609 if (dlist_terminate(append)) while (append) {
610 struct append *a = append->next;
611
612 if (append->file) {
613 int fd = open(append->str, O_RDONLY);
614
615
616 if (fd != -1) {
617 if (TT.noeol) xwrite(TT.fdout, "\n", 1);
618 TT.noeol = 0;
619 xsendfile(fd, TT.fdout);
620 close(fd);
621 }
622 } else if (append->str) emit(append->str, strlen(append->str), 1);
623 else emit(line, 0, 0);
624 free(append);
625 append = a;
626 }
627 free(line);
628}
629
630
631static void do_sed_file(int fd, char *name)
632{
633 char *tmp;
634
635 if (FLAG(i)) {
636 struct sedcmd *command;
637
638 if (!fd) return error_msg("-i on stdin");
639 TT.fdout = copy_tempfile(fd, name, &tmp);
640 TT.count = 0;
641 for (command = (void *)TT.pattern; command; command = command->next)
642 command->hit = 0;
643 }
644 do_lines(fd, TT.delim, sed_line);
645 if (FLAG(i)) {
646 if (TT.i && *TT.i) {
647 char *s = xmprintf("%s%s", name, TT.i);
648
649 xrename(name, s);
650 free(s);
651 }
652 replace_tempfile(-1, TT.fdout, &tmp);
653 TT.fdout = 1;
654 TT.nextline = 0;
655 TT.nextlen = TT.noeol = 0;
656 }
657}
658
659
660
661
662
663static char *unescape_delimited_string(char **pstr, char *delim)
664{
665 char *to, *from, mode = 0, d;
666
667
668 from = *pstr;
669 if (!delim || !*delim) {
670 if (!(d = *(from++))) return 0;
671 if (d == '\\') d = *(from++);
672 if (!d || d == '\\') return 0;
673 if (delim) *delim = d;
674 } else d = *delim;
675 to = delim = xmalloc(strlen(*pstr)+1);
676
677 while (mode || *from != d) {
678 if (!*from) return 0;
679
680
681 if (*from == '[') {
682 if (!mode) {
683 mode = ']';
684 if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
685 } else if (mode == ']' && strchr(".=:", from[1])) {
686 *(to++) = *(from++);
687 mode = *from;
688 }
689 } else if (*from == mode) {
690 if (mode == ']') mode = 0;
691 else {
692 *(to++) = *(from++);
693 mode = ']';
694 }
695
696
697 } else if (mode && *from == '-' && from[-1] == from[1]) {
698 from+=2;
699 continue;
700 } else if (*from == '\\') {
701 if (!from[1]) return 0;
702
703
704 if (from[1] == d) from++;
705 else if (from[1]=='\\') *(to++) = *(from++);
706 else {
707 char c = unescape(from[1]);
708
709 if (c) {
710 *(to++) = c;
711 from+=2;
712 continue;
713 } else if (!mode) *(to++) = *(from++);
714 }
715 }
716 *(to++) = *(from++);
717 }
718 *to = 0;
719 *pstr = from+1;
720
721 return delim;
722}
723
724
725
726static void parse_pattern(char **pline, long len)
727{
728 struct sedcmd *command = (void *)TT.pattern;
729 char *line, *reg, c, *errstart;
730 int i;
731
732 line = errstart = pline ? *pline : "";
733 if (len && line[len-1]=='\n') line[--len] = 0;
734
735
736
737
738
739 if (command && command->prev->hit) {
740
741 TT.pattern = TT.pattern->prev;
742 command = dlist_pop(&TT.pattern);
743 c = command->c;
744 reg = (char *)command;
745 reg += command->arg1 + strlen(reg + command->arg1);
746
747
748
749
750 if (command->hit < 256) goto resume_s;
751 else goto resume_a;
752 }
753
754
755
756 command = 0;
757 for (;;) {
758 if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
759
760
761 for (;;) {
762 while (isspace(*line) || *line == ';') line++;
763 if (*line == '#') while (*line && *line != '\n') line++;
764 else break;
765 }
766 if (!*line) return;
767
768
769
770
771 errstart = line;
772 memset(toybuf, 0, sizeof(struct sedcmd));
773 command = (void *)toybuf;
774 reg = toybuf + sizeof(struct sedcmd);
775
776
777 for (i = 0; i < 2; i++) {
778 if (*line == ',') line++;
779 else if (i) break;
780
781 if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
782 else if (*line == '$') {
783 command->lmatch[i] = -1;
784 line++;
785 } else if (*line == '/' || *line == '\\') {
786 char *s = line;
787
788 if (!(s = unescape_delimited_string(&line, 0))) goto error;
789 if (!*s) command->rmatch[i] = 0;
790 else {
791 xregcomp((void *)reg, s, REG_EXTENDED*!!FLAG(r));
792 command->rmatch[i] = reg-toybuf;
793 reg += sizeof(regex_t);
794 }
795 free(s);
796 } else break;
797 }
798
799 while (isspace(*line)) line++;
800 if (!*line) break;
801
802 while (*line == '!') {
803 command->not = 1;
804 line++;
805 }
806 while (isspace(*line)) line++;
807
808 c = command->c = *(line++);
809 if (strchr("}:", c) && i) break;
810 if (strchr("aiqr=", c) && i>1) break;
811
812
813 command = xmemdup(toybuf, reg-toybuf);
814 reg = (reg-toybuf) + (char *)command;
815
816
817 if (c == '{') TT.nextlen++;
818 else if (c == '}') {
819 if (!TT.nextlen--) break;
820 } else if (c == 's') {
821 char *end, delim = 0;
822
823
824
825
826
827
828
829
830 command->arg2 = reg - (char *)command;
831 if (!(TT.remember = unescape_delimited_string(&line, &delim)))
832 goto error;
833
834 reg += sizeof(regex_t);
835 command->arg1 = reg-(char *)command;
836 command->hit = delim;
837resume_s:
838
839
840 end = line;
841 while (*end != command->hit) {
842 if (!*end) goto error;
843 if (*end++ == '\\') {
844 if (!*end || *end == '\n') {
845 end[-1] = '\n';
846 break;
847 }
848 end++;
849 }
850 }
851
852 reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
853 line = end;
854
855 if (*line == command->hit) command->hit = 0;
856 else {
857 if (!*line) continue;
858 reg--;
859 line++;
860 goto resume_s;
861 }
862
863
864 i = command->arg1;
865 command->arg1 = command->arg2;
866 command->arg2 = i;
867
868
869 for (line++; *line; line++) {
870 long l;
871
872 if (isspace(*line) && *line != '\n') continue;
873
874 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
875 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
876 command->sflags |= l << 3;
877 line--;
878 } else break;
879 }
880
881
882
883 if (!*TT.remember) command->arg1 = 0;
884 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
885 (REG_EXTENDED*!!FLAG(r))|((command->sflags&1)*REG_ICASE));
886 free(TT.remember);
887 TT.remember = 0;
888 if (*line == 'w') {
889 line++;
890 goto writenow;
891 }
892 } else if (c == 'w') {
893 int fd, delim;
894 char *cc;
895
896
897
898
899
900
901writenow:
902 while (isspace(*line)) line++;
903 if (!*line) goto error;
904 for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
905 delim = *cc;
906 *cc = 0;
907 fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
908 *cc = delim;
909
910 command->w = reg - (char *)command;
911 command = xrealloc(command, command->w+(cc-line)+6);
912 reg = command->w + (char *)command;
913
914 memcpy(reg, &fd, 4);
915 reg += 4;
916 *(reg++) = 0;
917 memcpy(reg, line, delim);
918 reg += delim;
919 *(reg++) = 0;
920
921 line = cc;
922 if (delim) line += 2;
923 } else if (c == 'y') {
924 char *s, delim = 0;
925 int len;
926
927 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
928 command->arg1 = reg-(char *)command;
929 len = strlen(s);
930 reg = extend_string((void *)&command, s, reg-(char *)command, len);
931 free(s);
932 command->arg2 = reg-(char *)command;
933 if (!(s = unescape_delimited_string(&line, &delim))) goto error;
934 if (len != strlen(s)) goto error;
935 reg = extend_string((void *)&command, s, reg-(char*)command, len);
936 free(s);
937 } else if (strchr("abcirtTw:", c)) {
938 int end;
939
940
941 while (isspace(*line) && *line != '\n') line++;
942
943
944
945resume_a:
946 command->hit = 0;
947
948
949 if (!(end = strcspn(line, strchr(":btT", c) ? "}; \t\r\n\v\f" : "\n"))) {
950
951 if (strchr("btT", c)) continue;
952 else if (!command->arg1) break;
953 }
954
955
956
957
958 if (!command->arg1) command->arg1 = reg - (char*)command;
959 else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
960 else if (!pline) {
961 command->arg1 = 0;
962 continue;
963 }
964 reg = extend_string((void *)&command, line, reg - (char *)command, end);
965
966
967 if (strchr("aci", c)) {
968 reg -= end+1;
969 for (i = end; i; i--) {
970 if ((*reg++ = *line++)=='\\') {
971
972
973
974 if (!--i) {
975 *--reg = 0;
976 if (*line) {
977 line++;
978 goto resume_a;
979 }
980 command->hit = 256;
981 break;
982 }
983 if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
984 line++;
985 }
986 }
987 *reg = 0;
988 } else line += end;
989
990
991 } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
992 }
993
994error:
995 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
996}
997
998void sed_main(void)
999{
1000 struct arg_list *al;
1001 char **args = toys.optargs;
1002
1003 if (!FLAG(z)) TT.delim = '\n';
1004
1005
1006
1007
1008 if (FLAG(version)) {
1009 xprintf("This is not GNU sed version 9.0\n");
1010 return;
1011 }
1012
1013
1014 if (FLAG(help)) help_exit(0);
1015
1016
1017
1018
1019 if (!TT.e && !TT.f) {
1020 if (!*toys.optargs) error_exit("no pattern");
1021 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
1022 }
1023
1024
1025
1026
1027 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg));
1028 parse_pattern(0, 0);
1029 for (al = TT.f; al; al = al->next)
1030 do_lines(xopenro(al->arg), TT.delim, parse_pattern);
1031 dlist_terminate(TT.pattern);
1032 if (TT.nextlen) error_exit("no }");
1033
1034 TT.fdout = 1;
1035 TT.remember = xstrdup("");
1036
1037
1038 loopfiles_rw(args, O_RDONLY|WARN_ONLY, 0, do_sed_file);
1039
1040
1041 if (!FLAG(i)) {
1042 toys.optflags |= FLAG_i;
1043 sed_line(0, 0);
1044 }
1045
1046
1047}
1048