1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "libbb.h"
86#include "common_bufsiz.h"
87#include "xregex.h"
88
89#if 0
90# define dbg(...) bb_error_msg(__VA_ARGS__)
91#else
92# define dbg(...) ((void)0)
93#endif
94
95
96enum {
97 OPT_in_place = 1 << 0,
98};
99
100
101typedef struct sed_cmd_s {
102
103 struct sed_cmd_s *next;
104
105
106 regex_t *beg_match;
107 regex_t *end_match;
108 regex_t *sub_match;
109 int beg_line;
110 int beg_line_orig;
111 int end_line;
112 int end_line_orig;
113
114 FILE *sw_file;
115 char *string;
116
117 unsigned which_match;
118
119
120 unsigned invert:1;
121 unsigned in_match:1;
122 unsigned sub_p:1;
123
124 char sw_last_char;
125
126
127 char cmd;
128} sed_cmd_t;
129
130static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v";
131
132struct globals {
133
134 int be_quiet, regex_type;
135
136 FILE *nonstdout;
137 char *outname, *hold_space;
138 smallint exitcode;
139
140
141 int current_input_file, last_input_file;
142 char **input_file_list;
143 FILE *current_fp;
144
145 regmatch_t regmatch[10];
146 regex_t *previous_regex_ptr;
147
148
149 sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
150
151
152 llist_t *append_head;
153
154 char *add_cmd_line;
155
156 struct pipeline {
157 char *buf;
158 int idx;
159 int len;
160 } pipeline;
161} FIX_ALIASING;
162#define G (*(struct globals*)bb_common_bufsiz1)
163#define INIT_G() do { \
164 setup_common_bufsiz(); \
165 BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
166 G.sed_cmd_tail = &G.sed_cmd_head; \
167} while (0)
168
169
170#if ENABLE_FEATURE_CLEAN_UP
171static void sed_free_and_close_stuff(void)
172{
173 sed_cmd_t *sed_cmd = G.sed_cmd_head;
174
175 llist_free(G.append_head, free);
176
177 while (sed_cmd) {
178 sed_cmd_t *sed_cmd_next = sed_cmd->next;
179
180 if (sed_cmd->sw_file)
181 fclose(sed_cmd->sw_file);
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200 free(sed_cmd->string);
201 free(sed_cmd);
202 sed_cmd = sed_cmd_next;
203 }
204
205 free(G.hold_space);
206
207 if (G.current_fp)
208 fclose(G.current_fp);
209}
210#else
211void sed_free_and_close_stuff(void);
212#endif
213
214
215
216static void cleanup_outname(void)
217{
218 if (G.outname) unlink(G.outname);
219}
220
221
222
223static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
224{
225 char *d = dest;
226 int i = 0;
227
228 if (len == -1)
229 len = strlen(string);
230
231 while (i < len) {
232 if (string[i] == '\\') {
233 if (!to || string[i+1] == from) {
234 if ((*d = to ? to : string[i+1]) == '\0')
235 return d - dest;
236 i += 2;
237 d++;
238 continue;
239 }
240 i++;
241 *d++ = '\\';
242
243 }
244 if ((*d = string[i++]) == '\0')
245 return d - dest;
246 d++;
247 }
248 *d = '\0';
249 return d - dest;
250}
251
252static char *copy_parsing_escapes(const char *string, int len)
253{
254 const char *s;
255 char *dest = xmalloc(len + 1);
256
257
258
259 for (s = "\nn\tt\rr"; *s; s += 2) {
260 len = parse_escapes(dest, string, len, s[1], s[0]);
261 string = dest;
262 }
263 return dest;
264}
265
266
267
268
269
270
271
272
273static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
274{
275 int bracket = -1;
276 int escaped = 0;
277 int idx = 0;
278 char ch;
279
280 if (delimiter < 0) {
281 bracket--;
282 delimiter = -delimiter;
283 }
284
285 for (; (ch = str[idx]) != '\0'; idx++) {
286 if (bracket >= 0) {
287 if (ch == ']'
288 && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
289 ) {
290 bracket = -1;
291 }
292 } else if (escaped)
293 escaped = 0;
294 else if (ch == '\\')
295 escaped = 1;
296 else if (bracket == -1 && ch == '[')
297 bracket = idx;
298 else if (ch == delimiter)
299 return idx;
300 }
301
302
303 bb_error_msg_and_die("unmatched '%c'", delimiter);
304}
305
306
307
308
309static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
310{
311 const char *cmdstr_ptr = cmdstr;
312 unsigned char delimiter;
313 int idx = 0;
314
315
316
317 if (*cmdstr == '\0')
318 bb_error_msg_and_die("bad format in substitution expression");
319 delimiter = *cmdstr_ptr++;
320
321
322 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
323 *match = copy_parsing_escapes(cmdstr_ptr, idx);
324
325
326 cmdstr_ptr += idx + 1;
327 idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
328 *replace = copy_parsing_escapes(cmdstr_ptr, idx);
329
330 return ((cmdstr_ptr - cmdstr) + idx);
331}
332
333
334
335
336static int get_address(const char *my_str, int *linenum, regex_t ** regex)
337{
338 const char *pos = my_str;
339
340 if (isdigit(*my_str)) {
341 *linenum = strtol(my_str, (char**)&pos, 10);
342
343 } else if (*my_str == '$') {
344 *linenum = -1;
345 pos++;
346 } else if (*my_str == '/' || *my_str == '\\') {
347 int next;
348 char delimiter;
349 char *temp;
350
351 delimiter = '/';
352 if (*my_str == '\\')
353 delimiter = *++pos;
354 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
355 if (next != 0) {
356 temp = copy_parsing_escapes(pos, next);
357 G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
358 xregcomp(*regex, temp, G.regex_type);
359 free(temp);
360 } else {
361 *regex = G.previous_regex_ptr;
362 if (!G.previous_regex_ptr)
363 bb_error_msg_and_die("no previous regexp");
364 }
365
366 pos += (next+1);
367 }
368 return pos - my_str;
369}
370
371
372static int parse_file_cmd( const char *filecmdstr, char **retval)
373{
374 int start = 0, idx, hack = 0;
375
376
377 while (isspace(filecmdstr[start]))
378 start++;
379 idx = start;
380 while (filecmdstr[idx] && filecmdstr[idx] != '\n')
381 idx++;
382
383
384 if (filecmdstr[idx] == '\n')
385 hack = 1;
386 if (idx == start)
387 bb_error_msg_and_die("empty filename");
388 *retval = xstrndup(filecmdstr+start, idx-start+hack+1);
389 if (hack)
390 (*retval)[idx] = '\\';
391
392 return idx;
393}
394
395static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
396{
397 int cflags = G.regex_type;
398 char *match;
399 int idx;
400
401
402
403
404
405
406
407 idx = parse_regex_delim(substr, &match, &sed_cmd->string);
408
409
410
411
412
413
414
415
416 sed_cmd->which_match = 1;
417 dbg("s flags:'%s'", substr + idx + 1);
418 while (substr[++idx]) {
419 dbg("s flag:'%c'", substr[idx]);
420
421 if (isdigit(substr[idx])) {
422 if (match[0] != '^') {
423
424 const char *pos = substr + idx;
425
426 sed_cmd->which_match = (unsigned)strtol(substr+idx, (char**) &pos, 10);
427 idx = pos - substr - 1;
428 }
429 continue;
430 }
431
432 if (isspace(substr[idx]))
433 continue;
434
435 switch (substr[idx]) {
436
437 case 'g':
438 if (match[0] != '^')
439 sed_cmd->which_match = 0;
440 break;
441
442 case 'p':
443 sed_cmd->sub_p = 1;
444 break;
445
446 case 'w':
447 {
448 char *fname;
449 idx += parse_file_cmd( substr+idx+1, &fname);
450 sed_cmd->sw_file = xfopen_for_write(fname);
451 sed_cmd->sw_last_char = '\n';
452 free(fname);
453 break;
454 }
455
456 case 'i':
457 case 'I':
458 cflags |= REG_ICASE;
459 break;
460
461 case '#':
462
463 idx += strlen(substr + idx);
464
465
466 case ';':
467 case '}':
468 goto out;
469 default:
470 dbg("s bad flags:'%s'", substr + idx);
471 bb_error_msg_and_die("bad option in substitution expression");
472 }
473 }
474 out:
475
476 if (*match != '\0') {
477
478 sed_cmd->sub_match = xzalloc(sizeof(regex_t));
479 dbg("xregcomp('%s',%x)", match, cflags);
480 xregcomp(sed_cmd->sub_match, match, cflags);
481 dbg("regcomp ok");
482 }
483 free(match);
484
485 return idx;
486}
487
488
489
490
491static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
492{
493 static const char cmd_letters[] ALIGN1 = "saicrw:btTydDgGhHlnNpPqx={}";
494 enum {
495 IDX_s = 0,
496 IDX_a,
497 IDX_i,
498 IDX_c,
499 IDX_r,
500 IDX_w,
501 IDX_colon,
502 IDX_b,
503 IDX_t,
504 IDX_T,
505 IDX_y,
506 IDX_d,
507 IDX_D,
508 IDX_g,
509 IDX_G,
510 IDX_h,
511 IDX_H,
512 IDX_l,
513 IDX_n,
514 IDX_N,
515 IDX_p,
516 IDX_P,
517 IDX_q,
518 IDX_x,
519 IDX_equal,
520 IDX_lbrace,
521 IDX_rbrace,
522 IDX_nul
523 };
524 unsigned idx;
525
526 BUILD_BUG_ON(sizeof(cmd_letters)-1 != IDX_nul);
527
528 idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
529
530
531 if (idx == IDX_s) {
532 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
533 }
534
535 else if (idx <= IDX_c) {
536 unsigned len;
537
538 if (idx < IDX_c) {
539 if (sed_cmd->end_line || sed_cmd->end_match)
540 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
541 }
542 for (;;) {
543 if (*cmdstr == '\n' || *cmdstr == '\\') {
544 cmdstr++;
545 break;
546 }
547 if (!isspace(*cmdstr))
548 break;
549 cmdstr++;
550 }
551 len = strlen(cmdstr);
552 sed_cmd->string = copy_parsing_escapes(cmdstr, len);
553 cmdstr += len;
554
555 parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');
556 }
557
558 else if (idx <= IDX_w) {
559 if (idx < IDX_w) {
560 if (sed_cmd->end_line || sed_cmd->end_match)
561 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
562 }
563 cmdstr += parse_file_cmd( cmdstr, &sed_cmd->string);
564 if (sed_cmd->cmd == 'w') {
565 sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
566 sed_cmd->sw_last_char = '\n';
567 }
568 }
569
570 else if (idx <= IDX_T) {
571 int length;
572
573 cmdstr = skip_whitespace(cmdstr);
574 length = strcspn(cmdstr, semicolon_whitespace);
575 if (length) {
576 sed_cmd->string = xstrndup(cmdstr, length);
577 cmdstr += length;
578 }
579 }
580
581 else if (idx == IDX_y) {
582 char *match, *replace;
583 int i = cmdstr[0];
584
585 cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1;
586
587 parse_escapes(match, match, -1, i, i);
588 parse_escapes(replace, replace, -1, i, i);
589
590 sed_cmd->string = xzalloc((strlen(match) + 1) * 2);
591 for (i = 0; match[i] && replace[i]; i++) {
592 sed_cmd->string[i*2] = match[i];
593 sed_cmd->string[i*2+1] = replace[i];
594 }
595 free(match);
596 free(replace);
597 }
598
599
600
601 else if (idx >= IDX_nul) {
602 bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
603 }
604
605
606 return cmdstr;
607}
608
609
610
611
612static void add_cmd(const char *cmdstr)
613{
614 sed_cmd_t *sed_cmd;
615 unsigned len, n;
616
617
618 if (G.add_cmd_line) {
619 char *tp = xasprintf("%s\n%s", G.add_cmd_line, cmdstr);
620 free(G.add_cmd_line);
621 cmdstr = G.add_cmd_line = tp;
622 }
623
624
625 n = len = strlen(cmdstr);
626 while (n && cmdstr[n-1] == '\\')
627 n--;
628 if ((len - n) & 1) {
629 if (!G.add_cmd_line)
630 G.add_cmd_line = xstrdup(cmdstr);
631 G.add_cmd_line[len-1] = '\0';
632 return;
633 }
634
635
636 while (*cmdstr) {
637
638 cmdstr += strspn(cmdstr, semicolon_whitespace);
639
640
641 if (!*cmdstr) break;
642
643
644 if (*cmdstr == '#') {
645
646 if (cmdstr[1] == 'n')
647 G.be_quiet++;
648 cmdstr = strpbrk(cmdstr, "\n\r");
649 if (!cmdstr) break;
650 continue;
651 }
652
653
654
655
656
657
658
659 sed_cmd = xzalloc(sizeof(sed_cmd_t));
660
661
662 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
663 sed_cmd->beg_line_orig = sed_cmd->beg_line;
664
665
666 if (*cmdstr == ',') {
667 int idx;
668
669 cmdstr++;
670 if (*cmdstr == '+' && isdigit(cmdstr[1])) {
671
672
673
674
675
676
677
678
679
680 char *end;
681
682 idx = strtol(cmdstr+1, &end, 10);
683 sed_cmd->end_line = -2 - idx;
684 cmdstr = end;
685 } else {
686 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
687 cmdstr += idx;
688 idx--;
689 }
690 if (idx < 0)
691 bb_error_msg_and_die("no address after comma");
692 sed_cmd->end_line_orig = sed_cmd->end_line;
693 }
694
695
696 cmdstr = skip_whitespace(cmdstr);
697
698
699 if (*cmdstr == '!') {
700 sed_cmd->invert = 1;
701 cmdstr++;
702
703
704 cmdstr = skip_whitespace(cmdstr);
705 }
706
707
708 if (!*cmdstr)
709 bb_error_msg_and_die("missing command");
710 sed_cmd->cmd = *cmdstr++;
711 cmdstr = parse_cmd_args(sed_cmd, cmdstr);
712
713
714
715
716
717
718
719
720 *G.sed_cmd_tail = sed_cmd;
721 G.sed_cmd_tail = &sed_cmd->next;
722 }
723
724
725 free(G.add_cmd_line);
726 G.add_cmd_line = NULL;
727}
728
729
730
731#define PIPE_GROW 64
732
733static void pipe_putc(char c)
734{
735 if (G.pipeline.idx == G.pipeline.len) {
736 G.pipeline.buf = xrealloc(G.pipeline.buf,
737 G.pipeline.len + PIPE_GROW);
738 G.pipeline.len += PIPE_GROW;
739 }
740 G.pipeline.buf[G.pipeline.idx++] = c;
741}
742
743static void do_subst_w_backrefs(char *line, char *replace)
744{
745 int i, j;
746
747
748 for (i = 0; replace[i]; i++) {
749
750 if (replace[i] == '\\') {
751 unsigned backref = replace[++i] - '0';
752 if (backref <= 9) {
753
754 if (G.regmatch[backref].rm_so != -1) {
755 j = G.regmatch[backref].rm_so;
756 while (j < G.regmatch[backref].rm_eo)
757 pipe_putc(line[j++]);
758 }
759 continue;
760 }
761
762
763
764
765 pipe_putc(replace[i]);
766 continue;
767 }
768
769 if (replace[i] == '&') {
770 j = G.regmatch[0].rm_so;
771 while (j < G.regmatch[0].rm_eo)
772 pipe_putc(line[j++]);
773 continue;
774 }
775
776 pipe_putc(replace[i]);
777 }
778}
779
780static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
781{
782 char *line = *line_p;
783 unsigned match_count = 0;
784 bool altered = 0;
785 bool prev_match_empty = 1;
786 bool tried_at_eol = 0;
787 regex_t *current_regex;
788
789 current_regex = sed_cmd->sub_match;
790
791 if (!current_regex) {
792 current_regex = G.previous_regex_ptr;
793 if (!current_regex)
794 bb_error_msg_and_die("no previous regexp");
795 }
796 G.previous_regex_ptr = current_regex;
797
798
799 dbg("matching '%s'", line);
800 if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) {
801 dbg("no match");
802 return 0;
803 }
804 dbg("match");
805
806
807 G.pipeline.buf = xmalloc(PIPE_GROW);
808 G.pipeline.len = PIPE_GROW;
809 G.pipeline.idx = 0;
810
811
812 do {
813 int start = G.regmatch[0].rm_so;
814 int end = G.regmatch[0].rm_eo;
815 int i;
816
817 match_count++;
818
819
820
821 if (sed_cmd->which_match
822 && (sed_cmd->which_match != match_count)
823 ) {
824 for (i = 0; i < end; i++)
825 pipe_putc(*line++);
826
827 if (start == end && *line)
828 pipe_putc(*line++);
829 goto next;
830 }
831
832
833 for (i = 0; i < start; i++)
834 pipe_putc(line[i]);
835
836
837
838
839
840
841
842
843 if (prev_match_empty || start != 0 || start != end) {
844
845 dbg("inserting replacement at %d in '%s'", start, line);
846 do_subst_w_backrefs(line, sed_cmd->string);
847
848 altered = 1;
849 } else {
850 dbg("NOT inserting replacement at %d in '%s'", start, line);
851 }
852
853
854
855
856 prev_match_empty = (start == end);
857 if (prev_match_empty) {
858 if (!line[end]) {
859 tried_at_eol = 1;
860 } else {
861 pipe_putc(line[end]);
862 end++;
863 }
864 }
865
866
867 dbg("line += %d", end);
868 line += end;
869
870
871 if (sed_cmd->which_match != 0)
872 break;
873 next:
874
875 if (*line == '\0') {
876 if (tried_at_eol)
877 break;
878 tried_at_eol = 1;
879 }
880
881
882 } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
883
884
885 while (1) {
886 char c = *line++;
887 pipe_putc(c);
888 if (c == '\0')
889 break;
890 }
891
892 free(*line_p);
893 *line_p = G.pipeline.buf;
894 return altered;
895}
896
897
898static sed_cmd_t *branch_to(char *label)
899{
900 sed_cmd_t *sed_cmd;
901
902 for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
903 if (sed_cmd->cmd == ':'
904 && sed_cmd->string
905 && strcmp(sed_cmd->string, label) == 0
906 ) {
907 return sed_cmd;
908 }
909 }
910 bb_error_msg_and_die("can't find label for jump to '%s'", label);
911}
912
913static void append(char *s)
914{
915 llist_add_to_end(&G.append_head, s);
916}
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932enum {
933 NO_EOL_CHAR = 1,
934 LAST_IS_NUL = 2,
935};
936static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char)
937{
938 char lpc = *last_puts_char;
939
940
941
942 if (lpc != '\n' && lpc != '\0') {
943 fputc('\n', file);
944 lpc = '\n';
945 }
946 fputs(s, file);
947
948
949 if (s[0])
950 lpc = 'x';
951
952
953 if (last_gets_char == LAST_IS_NUL) {
954 fputc('\0', file);
955 lpc = 'x';
956 } else
957
958 if (last_gets_char != NO_EOL_CHAR) {
959 fputc(last_gets_char, file);
960 lpc = last_gets_char;
961 }
962
963 if (ferror(file)) {
964 xfunc_error_retval = 4;
965 bb_error_msg_and_die(bb_msg_write_error);
966 }
967 *last_puts_char = lpc;
968}
969
970static void flush_append(char *last_puts_char)
971{
972 char *data;
973
974
975 while ((data = (char *)llist_pop(&G.append_head)) != NULL) {
976
977
978
979
980
981
982
983
984
985 puts_maybe_newline(data, G.nonstdout, last_puts_char, '\n');
986 free(data);
987 }
988}
989
990
991
992
993static char *get_next_line(char *gets_char, char *last_puts_char)
994{
995 char *temp = NULL;
996 size_t len;
997 char gc;
998
999 flush_append(last_puts_char);
1000
1001
1002
1003 gc = NO_EOL_CHAR;
1004 for (; G.current_input_file <= G.last_input_file; G.current_input_file++) {
1005 FILE *fp = G.current_fp;
1006 if (!fp) {
1007 const char *path = G.input_file_list[G.current_input_file];
1008 fp = stdin;
1009 if (path != bb_msg_standard_input) {
1010 fp = fopen_or_warn(path, "r");
1011 if (!fp) {
1012 G.exitcode = EXIT_FAILURE;
1013 continue;
1014 }
1015 }
1016 G.current_fp = fp;
1017 }
1018
1019
1020
1021 temp = bb_get_chunk_from_file(fp, &len);
1022 if (temp) {
1023
1024 char c = temp[len-1];
1025 if (c == '\n' || c == '\0') {
1026 temp[len-1] = '\0';
1027 gc = c;
1028 if (c == '\0') {
1029 int ch = fgetc(fp);
1030 if (ch != EOF)
1031 ungetc(ch, fp);
1032 else
1033 gc = LAST_IS_NUL;
1034 }
1035 }
1036
1037 break;
1038
1039
1040
1041
1042
1043
1044
1045
1046 }
1047
1048 fclose_if_not_stdin(fp);
1049 G.current_fp = NULL;
1050 }
1051 *gets_char = gc;
1052 return temp;
1053}
1054
1055#define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n))
1056
1057static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space)
1058{
1059 int retval = sed_cmd->beg_match && !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0);
1060 if (retval)
1061 G.previous_regex_ptr = sed_cmd->beg_match;
1062 return retval;
1063}
1064
1065
1066
1067static void process_files(void)
1068{
1069 char *pattern_space, *next_line;
1070 int linenum = 0;
1071 char last_puts_char = '\n';
1072 char last_gets_char, next_gets_char;
1073 sed_cmd_t *sed_cmd;
1074 int substituted;
1075
1076
1077 next_line = get_next_line(&next_gets_char, &last_puts_char);
1078
1079
1080 again:
1081 substituted = 0;
1082
1083
1084 pattern_space = next_line;
1085 if (!pattern_space)
1086 return;
1087 last_gets_char = next_gets_char;
1088
1089
1090
1091 next_line = get_next_line(&next_gets_char, &last_puts_char);
1092 linenum++;
1093
1094
1095 restart:
1096 for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1097 int old_matched, matched;
1098
1099 old_matched = sed_cmd->in_match;
1100
1101
1102
1103 dbg("match1:%d", sed_cmd->in_match);
1104 dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
1105 && !sed_cmd->beg_match && !sed_cmd->end_match));
1106 dbg("match3:%d", (sed_cmd->beg_line > 0
1107 && (sed_cmd->end_line || sed_cmd->end_match
1108 ? (sed_cmd->beg_line <= linenum)
1109 : (sed_cmd->beg_line == linenum)
1110 )
1111 ));
1112 dbg("match4:%d", (beg_match(sed_cmd, pattern_space)));
1113 dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
1114
1115
1116 sed_cmd->in_match = sed_cmd->in_match
1117
1118 || (!sed_cmd->beg_line && !sed_cmd->end_line
1119 && !sed_cmd->beg_match && !sed_cmd->end_match)
1120
1121 || (sed_cmd->beg_line > 0
1122 && (sed_cmd->end_line || sed_cmd->end_match
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132 ? (sed_cmd->beg_line <= linenum)
1133 : (sed_cmd->beg_line == linenum)
1134 )
1135 )
1136
1137 || (beg_match(sed_cmd, pattern_space))
1138
1139 || (sed_cmd->beg_line == -1 && next_line == NULL);
1140
1141
1142 matched = sed_cmd->in_match;
1143
1144 dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
1145 sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
1146
1147
1148
1149 if (matched) {
1150 if (sed_cmd->end_line <= -2) {
1151
1152 sed_cmd->end_line = linenum + (-sed_cmd->end_line - 2);
1153 }
1154
1155 if (sed_cmd->beg_line > 0) {
1156 sed_cmd->beg_line = -2;
1157 }
1158 dbg("end1:%d", sed_cmd->end_line ? sed_cmd->end_line == -1
1159 ? !next_line : (sed_cmd->end_line <= linenum)
1160 : !sed_cmd->end_match);
1161 dbg("end2:%d", sed_cmd->end_match && old_matched
1162 && !regexec(sed_cmd->end_match,pattern_space, 0, NULL, 0));
1163 sed_cmd->in_match = !(
1164
1165 (sed_cmd->end_line
1166 ? sed_cmd->end_line == -1
1167 ? !next_line
1168 : (sed_cmd->end_line <= linenum)
1169 : !sed_cmd->end_match
1170 )
1171
1172 || (sed_cmd->end_match && old_matched
1173 && (regexec(sed_cmd->end_match,
1174 pattern_space, 0, NULL, 0) == 0)
1175 )
1176 );
1177 }
1178
1179
1180 if (sed_cmd->cmd == '{') {
1181 if (sed_cmd->invert ? matched : !matched) {
1182 unsigned nest_cnt = 0;
1183 while (1) {
1184 if (sed_cmd->cmd == '{')
1185 nest_cnt++;
1186 if (sed_cmd->cmd == '}') {
1187 nest_cnt--;
1188 if (nest_cnt == 0)
1189 break;
1190 }
1191 sed_cmd = sed_cmd->next;
1192 if (!sed_cmd)
1193 bb_error_msg_and_die("unterminated {");
1194 }
1195 }
1196 continue;
1197 }
1198
1199
1200 if (sed_cmd->invert ? matched : !matched)
1201 continue;
1202
1203
1204 if (sed_cmd->beg_match) {
1205 G.previous_regex_ptr = sed_cmd->beg_match;
1206 }
1207
1208
1209 dbg("pattern_space:'%s' next_line:'%s' cmd:%c",
1210 pattern_space, next_line, sed_cmd->cmd);
1211 switch (sed_cmd->cmd) {
1212
1213
1214 case '=':
1215 fprintf(G.nonstdout, "%d\n", linenum);
1216 break;
1217
1218
1219 case 'P':
1220 {
1221 char *tmp = strchr(pattern_space, '\n');
1222 if (tmp) {
1223 *tmp = '\0';
1224
1225 sed_puts(pattern_space, '\n');
1226 *tmp = '\n';
1227 break;
1228 }
1229
1230 }
1231
1232
1233 case 'p':
1234
1235
1236
1237
1238 sed_puts(pattern_space, '\n');
1239 break;
1240
1241 case 'D':
1242 {
1243 char *tmp = strchr(pattern_space, '\n');
1244 if (tmp) {
1245 overlapping_strcpy(pattern_space, tmp + 1);
1246 goto restart;
1247 }
1248 }
1249
1250 case 'd':
1251 goto discard_line;
1252
1253
1254 case 's':
1255 if (!do_subst_command(sed_cmd, &pattern_space))
1256 break;
1257 dbg("do_subst_command succeeded:'%s'", pattern_space);
1258 substituted |= 1;
1259
1260
1261 if (sed_cmd->sub_p)
1262 sed_puts(pattern_space, last_gets_char);
1263
1264 if (sed_cmd->sw_file)
1265 puts_maybe_newline(
1266 pattern_space, sed_cmd->sw_file,
1267 &sed_cmd->sw_last_char, last_gets_char);
1268 break;
1269
1270
1271 case 'a':
1272 append(xstrdup(sed_cmd->string));
1273 break;
1274
1275
1276 case 'i':
1277 sed_puts(sed_cmd->string, '\n');
1278 break;
1279
1280
1281 case 'c':
1282
1283 if (!sed_cmd->in_match)
1284 sed_puts(sed_cmd->string, '\n');
1285 goto discard_line;
1286
1287
1288 case 'r':
1289 {
1290 FILE *rfile;
1291 rfile = fopen_for_read(sed_cmd->string);
1292 if (rfile) {
1293 char *line;
1294 while ((line = xmalloc_fgetline(rfile))
1295 != NULL)
1296 append(line);
1297 fclose(rfile);
1298 }
1299
1300 break;
1301 }
1302
1303
1304 case 'w':
1305 puts_maybe_newline(
1306 pattern_space, sed_cmd->sw_file,
1307 &sed_cmd->sw_last_char, last_gets_char);
1308 break;
1309
1310
1311 case 'n':
1312 if (!G.be_quiet)
1313 sed_puts(pattern_space, last_gets_char);
1314 if (next_line == NULL) {
1315
1316 goto discard_line;
1317 }
1318 free(pattern_space);
1319 pattern_space = next_line;
1320 last_gets_char = next_gets_char;
1321 next_line = get_next_line(&next_gets_char, &last_puts_char);
1322 substituted = 0;
1323 linenum++;
1324 break;
1325
1326
1327 case 'q':
1328
1329 free(next_line);
1330 next_line = NULL;
1331 goto discard_commands;
1332
1333
1334 case 'N':
1335 {
1336 int len;
1337
1338
1339
1340
1341
1342
1343
1344
1345 if (next_line == NULL) {
1346
1347 goto discard_commands;
1348 }
1349
1350 len = strlen(pattern_space);
1351 pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2);
1352 pattern_space[len] = '\n';
1353 strcpy(pattern_space + len+1, next_line);
1354 last_gets_char = next_gets_char;
1355 next_line = get_next_line(&next_gets_char, &last_puts_char);
1356 linenum++;
1357 break;
1358 }
1359
1360
1361 case 't':
1362 if (!substituted) break;
1363 substituted = 0;
1364
1365
1366 case 'T':
1367 if (substituted) break;
1368
1369
1370 case 'b':
1371 if (!sed_cmd->string) goto discard_commands;
1372 else sed_cmd = branch_to(sed_cmd->string);
1373 break;
1374
1375 case 'y':
1376 {
1377 int i, j;
1378 for (i = 0; pattern_space[i]; i++) {
1379 for (j = 0; sed_cmd->string[j]; j += 2) {
1380 if (pattern_space[i] == sed_cmd->string[j]) {
1381 pattern_space[i] = sed_cmd->string[j + 1];
1382 break;
1383 }
1384 }
1385 }
1386
1387 break;
1388 }
1389 case 'g':
1390 free(pattern_space);
1391 pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
1392 break;
1393 case 'G':
1394 {
1395 int pattern_space_size = 2;
1396 int hold_space_size = 0;
1397
1398 if (pattern_space)
1399 pattern_space_size += strlen(pattern_space);
1400 if (G.hold_space)
1401 hold_space_size = strlen(G.hold_space);
1402 pattern_space = xrealloc(pattern_space,
1403 pattern_space_size + hold_space_size);
1404 if (pattern_space_size == 2)
1405 pattern_space[0] = 0;
1406 strcat(pattern_space, "\n");
1407 if (G.hold_space)
1408 strcat(pattern_space, G.hold_space);
1409 last_gets_char = '\n';
1410
1411 break;
1412 }
1413 case 'h':
1414 free(G.hold_space);
1415 G.hold_space = xstrdup(pattern_space);
1416 break;
1417 case 'H':
1418 {
1419 int hold_space_size = 2;
1420 int pattern_space_size = 0;
1421
1422 if (G.hold_space)
1423 hold_space_size += strlen(G.hold_space);
1424 if (pattern_space)
1425 pattern_space_size = strlen(pattern_space);
1426 G.hold_space = xrealloc(G.hold_space,
1427 hold_space_size + pattern_space_size);
1428
1429 if (hold_space_size == 2)
1430 *G.hold_space = 0;
1431 strcat(G.hold_space, "\n");
1432 if (pattern_space)
1433 strcat(G.hold_space, pattern_space);
1434
1435 break;
1436 }
1437 case 'x':
1438 {
1439 char *tmp = pattern_space;
1440 pattern_space = G.hold_space ? G.hold_space : xzalloc(1);
1441 last_gets_char = '\n';
1442 G.hold_space = tmp;
1443 break;
1444 }
1445 }
1446 }
1447
1448
1449
1450
1451 discard_commands:
1452
1453
1454 if (!G.be_quiet)
1455 sed_puts(pattern_space, last_gets_char);
1456
1457
1458 discard_line:
1459 flush_append(&last_puts_char );
1460 free(pattern_space);
1461
1462 goto again;
1463}
1464
1465
1466
1467
1468
1469
1470
1471static void add_cmd_block(char *cmdstr)
1472{
1473 char *sv, *eol;
1474
1475 cmdstr = sv = xstrdup(cmdstr);
1476 do {
1477 eol = strchr(cmdstr, '\n');
1478 if (eol)
1479 *eol = '\0';
1480 add_cmd(cmdstr);
1481 cmdstr = eol + 1;
1482 } while (eol);
1483 free(sv);
1484}
1485
1486int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1487int sed_main(int argc UNUSED_PARAM, char **argv)
1488{
1489 unsigned opt;
1490 llist_t *opt_e, *opt_f;
1491 char *opt_i;
1492
1493#if ENABLE_LONG_OPTS
1494 static const char sed_longopts[] ALIGN1 =
1495
1496 "in-place\0" Optional_argument "i"
1497 "regexp-extended\0" No_argument "r"
1498 "quiet\0" No_argument "n"
1499 "silent\0" No_argument "n"
1500 "expression\0" Required_argument "e"
1501 "file\0" Required_argument "f";
1502#endif
1503
1504 INIT_G();
1505
1506
1507 if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);
1508
1509
1510 if (argv[1] && strcmp(argv[1], "--version") == 0) {
1511 puts("This is not GNU sed version 4.0");
1512 return 0;
1513 }
1514
1515
1516 opt_e = opt_f = NULL;
1517 opt_i = NULL;
1518
1519
1520
1521
1522
1523 opt = getopt32long(argv, "^"
1524 "i::rEne:*f:*"
1525 "\0" "nn",
1526 sed_longopts,
1527 &opt_i, &opt_e, &opt_f,
1528 &G.be_quiet);
1529
1530 argv += optind;
1531 if (opt & OPT_in_place) {
1532 die_func = cleanup_outname;
1533 }
1534 if (opt & (2|4))
1535 G.regex_type |= REG_EXTENDED;
1536
1537
1538 while (opt_e) {
1539 add_cmd_block(llist_pop(&opt_e));
1540 }
1541 while (opt_f) {
1542 char *line;
1543 FILE *cmdfile;
1544 cmdfile = xfopen_stdin(llist_pop(&opt_f));
1545 while ((line = xmalloc_fgetline(cmdfile)) != NULL) {
1546 add_cmd(line);
1547 free(line);
1548 }
1549 fclose_if_not_stdin(cmdfile);
1550 }
1551
1552 if (!(opt & 0x30)) {
1553 if (!*argv)
1554 bb_show_usage();
1555 add_cmd_block(*argv++);
1556 }
1557
1558 add_cmd("");
1559
1560
1561 G.nonstdout = stdout;
1562
1563
1564
1565
1566 G.input_file_list = argv;
1567 if (!argv[0]) {
1568 if (opt & OPT_in_place)
1569 bb_error_msg_and_die(bb_msg_requires_arg, "-i");
1570 argv[0] = (char*)bb_msg_standard_input;
1571
1572 } else {
1573 goto start;
1574
1575 for (; *argv; argv++) {
1576 struct stat statbuf;
1577 int nonstdoutfd;
1578 sed_cmd_t *sed_cmd;
1579
1580 G.last_input_file++;
1581 start:
1582 if (!(opt & OPT_in_place)) {
1583 if (LONE_DASH(*argv)) {
1584 *argv = (char*)bb_msg_standard_input;
1585 process_files();
1586 }
1587 continue;
1588 }
1589
1590
1591
1592 if (stat(*argv, &statbuf) != 0) {
1593 bb_simple_perror_msg(*argv);
1594 G.exitcode = EXIT_FAILURE;
1595 G.current_input_file++;
1596 continue;
1597 }
1598 G.outname = xasprintf("%sXXXXXX", *argv);
1599 nonstdoutfd = xmkstemp(G.outname);
1600 G.nonstdout = xfdopen_for_write(nonstdoutfd);
1601
1602
1603
1604 fchmod(nonstdoutfd, statbuf.st_mode);
1605 fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
1606
1607 process_files();
1608 fclose(G.nonstdout);
1609 G.nonstdout = stdout;
1610
1611 if (opt_i) {
1612 char *backupname = xasprintf("%s%s", *argv, opt_i);
1613 xrename(*argv, backupname);
1614 free(backupname);
1615 }
1616
1617 xrename(G.outname, *argv);
1618 free(G.outname);
1619 G.outname = NULL;
1620
1621
1622 for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1623 sed_cmd->beg_line = sed_cmd->beg_line_orig;
1624 sed_cmd->end_line = sed_cmd->end_line_orig;
1625 }
1626 }
1627
1628
1629
1630
1631
1632 }
1633
1634 process_files();
1635
1636 return G.exitcode;
1637}
1638