1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "libbb.h"
86#include "common_bufsiz.h"
87#include "xregex.h"
88
89#if 0
90# define dbg(...) bb_error_msg(__VA_ARGS__)
91#else
92# define dbg(...) ((void)0)
93#endif
94
95
96enum {
97 OPT_in_place = 1 << 0,
98};
99
100
101typedef struct sed_cmd_s {
102
103 struct sed_cmd_s *next;
104
105
106 regex_t *beg_match;
107 regex_t *end_match;
108 regex_t *sub_match;
109 int beg_line;
110 int beg_line_orig;
111 int end_line;
112 int end_line_orig;
113
114 FILE *sw_file;
115 char *string;
116
117 unsigned which_match;
118
119
120 unsigned invert:1;
121 unsigned in_match:1;
122 unsigned sub_p:1;
123
124 char sw_last_char;
125
126
127 char cmd;
128} sed_cmd_t;
129
130static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v";
131
132struct globals {
133
134 int be_quiet, regex_type;
135
136 FILE *nonstdout;
137 char *outname, *hold_space;
138 smallint exitcode;
139
140
141 int current_input_file, last_input_file;
142 char **input_file_list;
143 FILE *current_fp;
144
145 regmatch_t regmatch[10];
146 regex_t *previous_regex_ptr;
147
148
149 sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
150
151
152 llist_t *append_head;
153
154 char *add_cmd_line;
155
156 struct pipeline {
157 char *buf;
158 int idx;
159 int len;
160 } pipeline;
161} FIX_ALIASING;
162#define G (*(struct globals*)bb_common_bufsiz1)
163#define INIT_G() do { \
164 setup_common_bufsiz(); \
165 BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
166 G.sed_cmd_tail = &G.sed_cmd_head; \
167} while (0)
168
169
170#if ENABLE_FEATURE_CLEAN_UP
171static void sed_free_and_close_stuff(void)
172{
173 sed_cmd_t *sed_cmd = G.sed_cmd_head;
174
175 llist_free(G.append_head, free);
176
177 while (sed_cmd) {
178 sed_cmd_t *sed_cmd_next = sed_cmd->next;
179
180 if (sed_cmd->sw_file)
181 fclose(sed_cmd->sw_file);
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200 free(sed_cmd->string);
201 free(sed_cmd);
202 sed_cmd = sed_cmd_next;
203 }
204
205 free(G.hold_space);
206
207 if (G.current_fp)
208 fclose(G.current_fp);
209}
210#else
211void sed_free_and_close_stuff(void);
212#endif
213
214
215
216static void cleanup_outname(void)
217{
218 if (G.outname) unlink(G.outname);
219}
220
221
222
223static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
224{
225 char *d = dest;
226 int i = 0;
227
228 if (len == -1)
229 len = strlen(string);
230
231 while (i < len) {
232 if (string[i] == '\\') {
233 if (!to || string[i+1] == from) {
234 if ((*d = to ? to : string[i+1]) == '\0')
235 return d - dest;
236 i += 2;
237 d++;
238 continue;
239 }
240 i++;
241 *d++ = '\\';
242
243 }
244 if ((*d = string[i++]) == '\0')
245 return d - dest;
246 d++;
247 }
248 *d = '\0';
249 return d - dest;
250}
251
252static char *copy_parsing_escapes(const char *string, int len)
253{
254 const char *s;
255 char *dest = xmalloc(len + 1);
256
257
258
259 for (s = "\nn\tt\rr"; *s; s += 2) {
260 len = parse_escapes(dest, string, len, s[1], s[0]);
261 string = dest;
262 }
263 return dest;
264}
265
266
267
268
269
270
271
272
273static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
274{
275 int bracket = -1;
276 int escaped = 0;
277 int idx = 0;
278 char ch;
279
280 if (delimiter < 0) {
281 bracket--;
282 delimiter = -delimiter;
283 }
284
285 for (; (ch = str[idx]) != '\0'; idx++) {
286 if (bracket >= 0) {
287 if (ch == ']'
288 && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
289 ) {
290 bracket = -1;
291 }
292 } else if (escaped)
293 escaped = 0;
294 else if (ch == '\\')
295 escaped = 1;
296 else if (bracket == -1 && ch == '[')
297 bracket = idx;
298 else if (ch == delimiter)
299 return idx;
300 }
301
302
303 bb_error_msg_and_die("unmatched '%c'", delimiter);
304}
305
306
307
308
309static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
310{
311 const char *cmdstr_ptr = cmdstr;
312 unsigned char delimiter;
313 int idx = 0;
314
315
316
317 if (*cmdstr == '\0')
318 bb_simple_error_msg_and_die("bad format in substitution expression");
319 delimiter = *cmdstr_ptr++;
320
321
322 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
323 *match = copy_parsing_escapes(cmdstr_ptr, idx);
324
325
326 cmdstr_ptr += idx + 1;
327 idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
328 *replace = copy_parsing_escapes(cmdstr_ptr, idx);
329
330 return ((cmdstr_ptr - cmdstr) + idx);
331}
332
333
334
335
336static int get_address(const char *my_str, int *linenum, regex_t ** regex)
337{
338 const char *pos = my_str;
339
340 if (isdigit(*my_str)) {
341 *linenum = strtol(my_str, (char**)&pos, 10);
342
343 } else if (*my_str == '$') {
344 *linenum = -1;
345 pos++;
346 } else if (*my_str == '/' || *my_str == '\\') {
347 int next;
348 char delimiter;
349 char *temp;
350
351 delimiter = '/';
352 if (*my_str == '\\')
353 delimiter = *++pos;
354 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
355 if (next != 0) {
356 temp = copy_parsing_escapes(pos, next);
357 G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
358 xregcomp(*regex, temp, G.regex_type);
359 free(temp);
360 } else {
361 *regex = G.previous_regex_ptr;
362 if (!G.previous_regex_ptr)
363 bb_simple_error_msg_and_die("no previous regexp");
364 }
365
366 pos += (next+1);
367 }
368 return pos - my_str;
369}
370
371
372static int parse_file_cmd( const char *filecmdstr, char **retval)
373{
374 const char *start;
375 const char *eol;
376
377
378 start = skip_whitespace(filecmdstr);
379 eol = strchrnul(start, '\n');
380 if (eol == start)
381 bb_simple_error_msg_and_die("empty filename");
382
383 if (*eol) {
384
385 *retval = xstrndup(start, eol-start + 1);
386 (*retval)[eol-start] = '\\';
387 } else {
388
389 *retval = xstrdup(start);
390 }
391
392 return eol - filecmdstr;
393}
394
395static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
396{
397 int cflags = G.regex_type;
398 char *match;
399 int idx;
400
401
402
403
404
405
406
407 idx = parse_regex_delim(substr, &match, &sed_cmd->string);
408
409
410
411
412
413
414
415
416 sed_cmd->which_match = 1;
417 dbg("s flags:'%s'", substr + idx + 1);
418 while (substr[++idx]) {
419 dbg("s flag:'%c'", substr[idx]);
420
421 if (isdigit(substr[idx])) {
422 if (match[0] != '^') {
423
424 const char *pos = substr + idx;
425
426 sed_cmd->which_match = (unsigned)strtol(substr+idx, (char**) &pos, 10);
427 idx = pos - substr - 1;
428 }
429 continue;
430 }
431
432 if (isspace(substr[idx]))
433 continue;
434
435 switch (substr[idx]) {
436
437 case 'g':
438 sed_cmd->which_match = 0;
439 break;
440
441 case 'p':
442 sed_cmd->sub_p = 1;
443 break;
444
445 case 'w':
446 {
447 char *fname;
448 idx += parse_file_cmd( substr+idx+1, &fname);
449 sed_cmd->sw_file = xfopen_for_write(fname);
450 sed_cmd->sw_last_char = '\n';
451 free(fname);
452 break;
453 }
454
455 case 'i':
456 case 'I':
457 cflags |= REG_ICASE;
458 break;
459
460 case '#':
461
462 idx += strlen(substr + idx);
463
464
465 case ';':
466 case '}':
467 goto out;
468 default:
469 dbg("s bad flags:'%s'", substr + idx);
470 bb_simple_error_msg_and_die("bad option in substitution expression");
471 }
472 }
473 out:
474
475 if (*match != '\0') {
476
477 sed_cmd->sub_match = xzalloc(sizeof(regex_t));
478 dbg("xregcomp('%s',%x)", match, cflags);
479 xregcomp(sed_cmd->sub_match, match, cflags);
480 dbg("regcomp ok");
481 }
482 free(match);
483
484 return idx;
485}
486
487
488
489
490static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
491{
492 static const char cmd_letters[] ALIGN1 = "saicrw:btTydDgGhHlnNpPqx={}";
493 enum {
494 IDX_s = 0,
495 IDX_a,
496 IDX_i,
497 IDX_c,
498 IDX_r,
499 IDX_w,
500 IDX_colon,
501 IDX_b,
502 IDX_t,
503 IDX_T,
504 IDX_y,
505 IDX_d,
506 IDX_D,
507 IDX_g,
508 IDX_G,
509 IDX_h,
510 IDX_H,
511 IDX_l,
512 IDX_n,
513 IDX_N,
514 IDX_p,
515 IDX_P,
516 IDX_q,
517 IDX_x,
518 IDX_equal,
519 IDX_lbrace,
520 IDX_rbrace,
521 IDX_nul
522 };
523 unsigned idx;
524
525 BUILD_BUG_ON(sizeof(cmd_letters)-1 != IDX_nul);
526
527 idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
528
529
530 if (idx == IDX_s) {
531 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
532 }
533
534 else if (idx <= IDX_c) {
535 unsigned len;
536
537 if (idx < IDX_c) {
538 if (sed_cmd->end_line || sed_cmd->end_match)
539 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
540 }
541 for (;;) {
542 if (*cmdstr == '\n' || *cmdstr == '\\') {
543 cmdstr++;
544 break;
545 }
546 if (!isspace(*cmdstr))
547 break;
548 cmdstr++;
549 }
550 len = strlen(cmdstr);
551 sed_cmd->string = copy_parsing_escapes(cmdstr, len);
552 cmdstr += len;
553
554 parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');
555 }
556
557 else if (idx <= IDX_w) {
558 if (idx < IDX_w) {
559 if (sed_cmd->end_line || sed_cmd->end_match)
560 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
561 }
562 cmdstr += parse_file_cmd( cmdstr, &sed_cmd->string);
563 if (sed_cmd->cmd == 'w') {
564 sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
565 sed_cmd->sw_last_char = '\n';
566 }
567 }
568
569 else if (idx <= IDX_T) {
570 int length;
571
572 cmdstr = skip_whitespace(cmdstr);
573 length = strcspn(cmdstr, semicolon_whitespace);
574 if (length) {
575 sed_cmd->string = xstrndup(cmdstr, length);
576 cmdstr += length;
577 }
578 }
579
580 else if (idx == IDX_y) {
581 char *match, *replace;
582 int i = cmdstr[0];
583
584 cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1;
585
586 parse_escapes(match, match, -1, i, i);
587 parse_escapes(replace, replace, -1, i, i);
588
589 sed_cmd->string = xzalloc((strlen(match) + 1) * 2);
590 for (i = 0; match[i] && replace[i]; i++) {
591 sed_cmd->string[i*2] = match[i];
592 sed_cmd->string[i*2+1] = replace[i];
593 }
594 free(match);
595 free(replace);
596 }
597
598
599
600 else if (idx >= IDX_nul) {
601 bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
602 }
603
604
605 return cmdstr;
606}
607
608
609
610
611static void add_cmd(const char *cmdstr)
612{
613 sed_cmd_t *sed_cmd;
614 unsigned len, n;
615
616
617 if (G.add_cmd_line) {
618 char *tp = xasprintf("%s\n%s", G.add_cmd_line, cmdstr);
619 free(G.add_cmd_line);
620 cmdstr = G.add_cmd_line = tp;
621 }
622
623
624 n = len = strlen(cmdstr);
625 while (n && cmdstr[n-1] == '\\')
626 n--;
627 if ((len - n) & 1) {
628 if (!G.add_cmd_line)
629 G.add_cmd_line = xstrdup(cmdstr);
630 G.add_cmd_line[len-1] = '\0';
631 return;
632 }
633
634
635 while (*cmdstr) {
636
637 cmdstr += strspn(cmdstr, semicolon_whitespace);
638
639
640 if (!*cmdstr) break;
641
642
643 if (*cmdstr == '#') {
644
645 if (cmdstr[1] == 'n')
646 G.be_quiet++;
647 cmdstr = strpbrk(cmdstr, "\n\r");
648 if (!cmdstr) break;
649 continue;
650 }
651
652
653
654
655
656
657
658 sed_cmd = xzalloc(sizeof(sed_cmd_t));
659
660
661 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
662 sed_cmd->beg_line_orig = sed_cmd->beg_line;
663
664
665 if (*cmdstr == ',') {
666 int idx;
667
668 cmdstr++;
669 if (*cmdstr == '+' && isdigit(cmdstr[1])) {
670
671
672
673
674
675
676
677
678
679 char *end;
680
681 idx = strtol(cmdstr+1, &end, 10);
682 sed_cmd->end_line = -2 - idx;
683 cmdstr = end;
684 } else {
685 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
686 cmdstr += idx;
687 idx--;
688 }
689 if (idx < 0)
690 bb_simple_error_msg_and_die("no address after comma");
691 sed_cmd->end_line_orig = sed_cmd->end_line;
692 }
693
694
695 cmdstr = skip_whitespace(cmdstr);
696
697
698 if (*cmdstr == '!') {
699 sed_cmd->invert = 1;
700 cmdstr++;
701
702
703 cmdstr = skip_whitespace(cmdstr);
704 }
705
706
707 if (!*cmdstr)
708 bb_simple_error_msg_and_die("missing command");
709 sed_cmd->cmd = *cmdstr++;
710 cmdstr = parse_cmd_args(sed_cmd, cmdstr);
711
712
713
714
715
716
717
718
719 *G.sed_cmd_tail = sed_cmd;
720 G.sed_cmd_tail = &sed_cmd->next;
721 }
722
723
724 free(G.add_cmd_line);
725 G.add_cmd_line = NULL;
726}
727
728
729
730#define PIPE_GROW 64
731
732static void pipe_putc(char c)
733{
734 if (G.pipeline.idx == G.pipeline.len) {
735 G.pipeline.buf = xrealloc(G.pipeline.buf,
736 G.pipeline.len + PIPE_GROW);
737 G.pipeline.len += PIPE_GROW;
738 }
739 G.pipeline.buf[G.pipeline.idx++] = c;
740}
741
742static void do_subst_w_backrefs(char *line, char *replace)
743{
744 int i, j;
745
746
747 for (i = 0; replace[i]; i++) {
748
749 if (replace[i] == '\\') {
750 unsigned backref = replace[++i] - '0';
751 if (backref <= 9) {
752
753 if (G.regmatch[backref].rm_so != -1) {
754 j = G.regmatch[backref].rm_so;
755 while (j < G.regmatch[backref].rm_eo)
756 pipe_putc(line[j++]);
757 }
758 continue;
759 }
760
761
762
763
764 pipe_putc(replace[i]);
765 continue;
766 }
767
768 if (replace[i] == '&') {
769 j = G.regmatch[0].rm_so;
770 while (j < G.regmatch[0].rm_eo)
771 pipe_putc(line[j++]);
772 continue;
773 }
774
775 pipe_putc(replace[i]);
776 }
777}
778
779static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
780{
781 char *line = *line_p;
782 unsigned match_count = 0;
783 bool altered = 0;
784 bool prev_match_empty = 1;
785 bool tried_at_eol = 0;
786 regex_t *current_regex;
787
788 current_regex = sed_cmd->sub_match;
789
790 if (!current_regex) {
791 current_regex = G.previous_regex_ptr;
792 if (!current_regex)
793 bb_simple_error_msg_and_die("no previous regexp");
794 }
795 G.previous_regex_ptr = current_regex;
796
797
798 dbg("matching '%s'", line);
799 if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) {
800 dbg("no match");
801 return 0;
802 }
803 dbg("match");
804
805
806 G.pipeline.buf = xmalloc(PIPE_GROW);
807 G.pipeline.len = PIPE_GROW;
808 G.pipeline.idx = 0;
809
810
811 do {
812 int start = G.regmatch[0].rm_so;
813 int end = G.regmatch[0].rm_eo;
814 int i;
815
816 match_count++;
817
818
819
820 if (sed_cmd->which_match
821 && (sed_cmd->which_match != match_count)
822 ) {
823 for (i = 0; i < end; i++)
824 pipe_putc(*line++);
825
826 if (start == end && *line)
827 pipe_putc(*line++);
828 goto next;
829 }
830
831
832 for (i = 0; i < start; i++)
833 pipe_putc(line[i]);
834
835
836
837
838
839
840
841
842 if (prev_match_empty || start != 0 || start != end) {
843
844 dbg("inserting replacement at %d in '%s'", start, line);
845 do_subst_w_backrefs(line, sed_cmd->string);
846
847 altered = 1;
848 } else {
849 dbg("NOT inserting replacement at %d in '%s'", start, line);
850 }
851
852
853
854
855 prev_match_empty = (start == end);
856 if (prev_match_empty) {
857 if (!line[end]) {
858 tried_at_eol = 1;
859 } else {
860 pipe_putc(line[end]);
861 end++;
862 }
863 }
864
865
866 dbg("line += %d", end);
867 line += end;
868
869
870 if (sed_cmd->which_match != 0)
871 break;
872 next:
873
874 if (*line == '\0') {
875 if (tried_at_eol)
876 break;
877 tried_at_eol = 1;
878 }
879
880
881 } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
882
883
884 while (1) {
885 char c = *line++;
886 pipe_putc(c);
887 if (c == '\0')
888 break;
889 }
890
891 free(*line_p);
892 *line_p = G.pipeline.buf;
893 return altered;
894}
895
896
897static sed_cmd_t *branch_to(char *label)
898{
899 sed_cmd_t *sed_cmd;
900
901 for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
902 if (sed_cmd->cmd == ':'
903 && sed_cmd->string
904 && strcmp(sed_cmd->string, label) == 0
905 ) {
906 return sed_cmd;
907 }
908 }
909 bb_error_msg_and_die("can't find label for jump to '%s'", label);
910}
911
912static void append(char *s)
913{
914 llist_add_to_end(&G.append_head, s);
915}
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931enum {
932 NO_EOL_CHAR = 1,
933 LAST_IS_NUL = 2,
934};
935static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char)
936{
937 char lpc = *last_puts_char;
938
939
940
941 if (lpc != '\n' && lpc != '\0') {
942 fputc('\n', file);
943 lpc = '\n';
944 }
945 fputs(s, file);
946
947
948 if (s[0])
949 lpc = 'x';
950
951
952 if (last_gets_char == LAST_IS_NUL) {
953 fputc('\0', file);
954 lpc = 'x';
955 } else
956
957 if (last_gets_char != NO_EOL_CHAR) {
958 fputc(last_gets_char, file);
959 lpc = last_gets_char;
960 }
961
962 if (ferror(file)) {
963 xfunc_error_retval = 4;
964 bb_simple_error_msg_and_die(bb_msg_write_error);
965 }
966 *last_puts_char = lpc;
967}
968
969static void flush_append(char *last_puts_char)
970{
971 char *data;
972
973
974 while ((data = (char *)llist_pop(&G.append_head)) != NULL) {
975
976
977
978
979
980
981
982
983
984 puts_maybe_newline(data, G.nonstdout, last_puts_char, '\n');
985 free(data);
986 }
987}
988
989
990
991
992static char *get_next_line(char *gets_char, char *last_puts_char)
993{
994 char *temp = NULL;
995 size_t len;
996 char gc;
997
998 flush_append(last_puts_char);
999
1000
1001
1002 gc = NO_EOL_CHAR;
1003 for (; G.current_input_file <= G.last_input_file; G.current_input_file++) {
1004 FILE *fp = G.current_fp;
1005 if (!fp) {
1006 const char *path = G.input_file_list[G.current_input_file];
1007 fp = stdin;
1008 if (path != bb_msg_standard_input) {
1009 fp = fopen_or_warn(path, "r");
1010 if (!fp) {
1011 G.exitcode = EXIT_FAILURE;
1012 continue;
1013 }
1014 }
1015 G.current_fp = fp;
1016 }
1017
1018
1019
1020 temp = bb_get_chunk_from_file(fp, &len);
1021 if (temp) {
1022
1023 char c = temp[len-1];
1024 if (c == '\n' || c == '\0') {
1025 temp[len-1] = '\0';
1026 gc = c;
1027 if (c == '\0') {
1028 int ch = fgetc(fp);
1029 if (ch != EOF)
1030 ungetc(ch, fp);
1031 else
1032 gc = LAST_IS_NUL;
1033 }
1034 }
1035
1036 break;
1037
1038
1039
1040
1041
1042
1043
1044
1045 }
1046
1047 fclose_if_not_stdin(fp);
1048 G.current_fp = NULL;
1049 }
1050 *gets_char = gc;
1051 return temp;
1052}
1053
1054#define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n))
1055
1056static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space)
1057{
1058 int retval = sed_cmd->beg_match && !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0);
1059 if (retval)
1060 G.previous_regex_ptr = sed_cmd->beg_match;
1061 return retval;
1062}
1063
1064
1065
1066static void process_files(void)
1067{
1068 char *pattern_space, *next_line;
1069 int linenum = 0;
1070 char last_puts_char = '\n';
1071 char last_gets_char, next_gets_char;
1072 sed_cmd_t *sed_cmd;
1073 int substituted;
1074
1075
1076 next_line = get_next_line(&next_gets_char, &last_puts_char);
1077
1078
1079 again:
1080 substituted = 0;
1081
1082
1083 pattern_space = next_line;
1084 if (!pattern_space)
1085 return;
1086 last_gets_char = next_gets_char;
1087
1088
1089
1090 next_line = get_next_line(&next_gets_char, &last_puts_char);
1091 linenum++;
1092
1093
1094 restart:
1095 for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1096 int old_matched, matched;
1097
1098 old_matched = sed_cmd->in_match;
1099 if (!old_matched)
1100 sed_cmd->end_line = sed_cmd->end_line_orig;
1101
1102
1103
1104 dbg("match1:%d", sed_cmd->in_match);
1105 dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
1106 && !sed_cmd->beg_match && !sed_cmd->end_match));
1107 dbg("match3:%d", (sed_cmd->beg_line > 0
1108 && (sed_cmd->end_line || sed_cmd->end_match
1109 ? (sed_cmd->beg_line <= linenum)
1110 : (sed_cmd->beg_line == linenum)
1111 )
1112 ));
1113 dbg("match4:%d", (beg_match(sed_cmd, pattern_space)));
1114 dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
1115
1116
1117 sed_cmd->in_match = sed_cmd->in_match
1118
1119 || (!sed_cmd->beg_line && !sed_cmd->end_line
1120 && !sed_cmd->beg_match && !sed_cmd->end_match)
1121
1122 || (sed_cmd->beg_line > 0
1123 && (sed_cmd->end_line || sed_cmd->end_match
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133 ? (sed_cmd->beg_line <= linenum)
1134 : (sed_cmd->beg_line == linenum)
1135 )
1136 )
1137
1138 || (beg_match(sed_cmd, pattern_space))
1139
1140 || (sed_cmd->beg_line == -1 && next_line == NULL);
1141
1142
1143 matched = sed_cmd->in_match;
1144
1145 dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
1146 sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
1147
1148
1149
1150 if (matched) {
1151 if (sed_cmd->end_line <= -2) {
1152
1153 sed_cmd->end_line = linenum + (-sed_cmd->end_line - 2);
1154 }
1155
1156 if (sed_cmd->beg_line > 0) {
1157 sed_cmd->beg_line = -2;
1158 }
1159 dbg("end1:%d", sed_cmd->end_line ? sed_cmd->end_line == -1
1160 ? !next_line : (sed_cmd->end_line <= linenum)
1161 : !sed_cmd->end_match);
1162 dbg("end2:%d", sed_cmd->end_match && old_matched
1163 && !regexec(sed_cmd->end_match,pattern_space, 0, NULL, 0));
1164 sed_cmd->in_match = !(
1165
1166 (sed_cmd->end_line
1167 ? sed_cmd->end_line == -1
1168 ? !next_line
1169 : (sed_cmd->end_line <= linenum)
1170 : !sed_cmd->end_match
1171 )
1172
1173 || (sed_cmd->end_match && old_matched
1174 && (regexec(sed_cmd->end_match,
1175 pattern_space, 0, NULL, 0) == 0)
1176 )
1177 );
1178 }
1179
1180
1181 if (sed_cmd->cmd == '{') {
1182 if (sed_cmd->invert ? matched : !matched) {
1183 unsigned nest_cnt = 0;
1184 while (1) {
1185 if (sed_cmd->cmd == '{')
1186 nest_cnt++;
1187 if (sed_cmd->cmd == '}') {
1188 nest_cnt--;
1189 if (nest_cnt == 0)
1190 break;
1191 }
1192 sed_cmd = sed_cmd->next;
1193 if (!sed_cmd)
1194 bb_simple_error_msg_and_die("unterminated {");
1195 }
1196 }
1197 continue;
1198 }
1199
1200
1201 if (sed_cmd->invert ? matched : !matched)
1202 continue;
1203
1204
1205 if (sed_cmd->beg_match) {
1206 G.previous_regex_ptr = sed_cmd->beg_match;
1207 }
1208
1209
1210 dbg("pattern_space:'%s' next_line:'%s' cmd:%c",
1211 pattern_space, next_line, sed_cmd->cmd);
1212 switch (sed_cmd->cmd) {
1213
1214
1215 case '=':
1216 fprintf(G.nonstdout, "%d\n", linenum);
1217 break;
1218
1219
1220 case 'P':
1221 {
1222 char *tmp = strchr(pattern_space, '\n');
1223 if (tmp) {
1224 *tmp = '\0';
1225
1226 sed_puts(pattern_space, '\n');
1227 *tmp = '\n';
1228 break;
1229 }
1230
1231 }
1232
1233
1234 case 'p':
1235
1236
1237
1238
1239 sed_puts(pattern_space, '\n');
1240 break;
1241
1242 case 'D':
1243 {
1244 char *tmp = strchr(pattern_space, '\n');
1245 if (tmp) {
1246 overlapping_strcpy(pattern_space, tmp + 1);
1247 goto restart;
1248 }
1249 }
1250
1251 case 'd':
1252 goto discard_line;
1253
1254
1255 case 's':
1256 if (!do_subst_command(sed_cmd, &pattern_space))
1257 break;
1258 dbg("do_subst_command succeeded:'%s'", pattern_space);
1259 substituted |= 1;
1260
1261
1262 if (sed_cmd->sub_p)
1263 sed_puts(pattern_space, last_gets_char);
1264
1265 if (sed_cmd->sw_file)
1266 puts_maybe_newline(
1267 pattern_space, sed_cmd->sw_file,
1268 &sed_cmd->sw_last_char, last_gets_char);
1269 break;
1270
1271
1272 case 'a':
1273 append(xstrdup(sed_cmd->string));
1274 break;
1275
1276
1277 case 'i':
1278 sed_puts(sed_cmd->string, '\n');
1279 break;
1280
1281
1282 case 'c':
1283
1284 if (!sed_cmd->in_match)
1285 sed_puts(sed_cmd->string, '\n');
1286 goto discard_line;
1287
1288
1289 case 'r':
1290 {
1291 FILE *rfile;
1292 rfile = fopen_for_read(sed_cmd->string);
1293 if (rfile) {
1294 char *line;
1295 while ((line = xmalloc_fgetline(rfile))
1296 != NULL)
1297 append(line);
1298 fclose(rfile);
1299 }
1300
1301 break;
1302 }
1303
1304
1305 case 'w':
1306 puts_maybe_newline(
1307 pattern_space, sed_cmd->sw_file,
1308 &sed_cmd->sw_last_char, last_gets_char);
1309 break;
1310
1311
1312 case 'n':
1313 if (!G.be_quiet)
1314 sed_puts(pattern_space, last_gets_char);
1315 if (next_line == NULL) {
1316
1317 goto discard_line;
1318 }
1319 free(pattern_space);
1320 pattern_space = next_line;
1321 last_gets_char = next_gets_char;
1322 next_line = get_next_line(&next_gets_char, &last_puts_char);
1323 substituted = 0;
1324 linenum++;
1325 break;
1326
1327
1328 case 'q':
1329
1330 free(next_line);
1331 next_line = NULL;
1332 goto discard_commands;
1333
1334
1335 case 'N':
1336 {
1337 int len;
1338
1339
1340
1341
1342
1343
1344
1345
1346 if (next_line == NULL) {
1347
1348 goto discard_commands;
1349 }
1350
1351 len = strlen(pattern_space);
1352 pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2);
1353 pattern_space[len] = '\n';
1354 strcpy(pattern_space + len+1, next_line);
1355 last_gets_char = next_gets_char;
1356 next_line = get_next_line(&next_gets_char, &last_puts_char);
1357 linenum++;
1358 break;
1359 }
1360
1361
1362 case 't':
1363 if (!substituted) break;
1364 substituted = 0;
1365
1366
1367 case 'T':
1368 if (substituted) break;
1369
1370
1371 case 'b':
1372 if (!sed_cmd->string) goto discard_commands;
1373 else sed_cmd = branch_to(sed_cmd->string);
1374 break;
1375
1376 case 'y':
1377 {
1378 int i, j;
1379 for (i = 0; pattern_space[i]; i++) {
1380 for (j = 0; sed_cmd->string[j]; j += 2) {
1381 if (pattern_space[i] == sed_cmd->string[j]) {
1382 pattern_space[i] = sed_cmd->string[j + 1];
1383 break;
1384 }
1385 }
1386 }
1387
1388 break;
1389 }
1390 case 'g':
1391 free(pattern_space);
1392 pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
1393 break;
1394 case 'G':
1395 {
1396 int pattern_space_size = 2;
1397 int hold_space_size = 0;
1398
1399 if (pattern_space)
1400 pattern_space_size += strlen(pattern_space);
1401 if (G.hold_space)
1402 hold_space_size = strlen(G.hold_space);
1403 pattern_space = xrealloc(pattern_space,
1404 pattern_space_size + hold_space_size);
1405 if (pattern_space_size == 2)
1406 pattern_space[0] = 0;
1407 strcat(pattern_space, "\n");
1408 if (G.hold_space)
1409 strcat(pattern_space, G.hold_space);
1410 last_gets_char = '\n';
1411
1412 break;
1413 }
1414 case 'h':
1415 free(G.hold_space);
1416 G.hold_space = xstrdup(pattern_space);
1417 break;
1418 case 'H':
1419 {
1420 int hold_space_size = 2;
1421 int pattern_space_size = 0;
1422
1423 if (G.hold_space)
1424 hold_space_size += strlen(G.hold_space);
1425 if (pattern_space)
1426 pattern_space_size = strlen(pattern_space);
1427 G.hold_space = xrealloc(G.hold_space,
1428 hold_space_size + pattern_space_size);
1429
1430 if (hold_space_size == 2)
1431 *G.hold_space = 0;
1432 strcat(G.hold_space, "\n");
1433 if (pattern_space)
1434 strcat(G.hold_space, pattern_space);
1435
1436 break;
1437 }
1438 case 'x':
1439 {
1440 char *tmp = pattern_space;
1441 pattern_space = G.hold_space ? G.hold_space : xzalloc(1);
1442 last_gets_char = '\n';
1443 G.hold_space = tmp;
1444 break;
1445 }
1446 }
1447 }
1448
1449
1450
1451
1452 discard_commands:
1453
1454
1455 if (!G.be_quiet)
1456 sed_puts(pattern_space, last_gets_char);
1457
1458
1459 discard_line:
1460 flush_append(&last_puts_char );
1461 free(pattern_space);
1462
1463 goto again;
1464}
1465
1466
1467
1468
1469
1470
1471
1472static void add_cmd_block(char *cmdstr)
1473{
1474 char *sv, *eol;
1475
1476 cmdstr = sv = xstrdup(cmdstr);
1477 do {
1478 eol = strchr(cmdstr, '\n');
1479 if (eol)
1480 *eol = '\0';
1481 add_cmd(cmdstr);
1482 cmdstr = eol + 1;
1483 } while (eol);
1484 free(sv);
1485}
1486
1487int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1488int sed_main(int argc UNUSED_PARAM, char **argv)
1489{
1490 unsigned opt;
1491 llist_t *opt_e, *opt_f;
1492 char *opt_i;
1493
1494#if ENABLE_LONG_OPTS
1495 static const char sed_longopts[] ALIGN1 =
1496
1497 "in-place\0" Optional_argument "i"
1498 "regexp-extended\0" No_argument "r"
1499 "quiet\0" No_argument "n"
1500 "silent\0" No_argument "n"
1501 "expression\0" Required_argument "e"
1502 "file\0" Required_argument "f";
1503#endif
1504
1505 INIT_G();
1506
1507
1508 if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);
1509
1510
1511 if (argv[1] && strcmp(argv[1], "--version") == 0) {
1512 puts("This is not GNU sed version 4.0");
1513 return 0;
1514 }
1515
1516
1517 opt_e = opt_f = NULL;
1518 opt_i = NULL;
1519
1520
1521
1522
1523
1524 opt = getopt32long(argv, "^"
1525 "i::rEne:*f:*"
1526 "\0" "nn",
1527 sed_longopts,
1528 &opt_i, &opt_e, &opt_f,
1529 &G.be_quiet);
1530
1531 argv += optind;
1532 if (opt & OPT_in_place) {
1533 die_func = cleanup_outname;
1534 }
1535 if (opt & (2|4))
1536 G.regex_type |= REG_EXTENDED;
1537
1538
1539 while (opt_e) {
1540 add_cmd_block(llist_pop(&opt_e));
1541 }
1542 while (opt_f) {
1543 char *line;
1544 FILE *cmdfile;
1545 cmdfile = xfopen_stdin(llist_pop(&opt_f));
1546 while ((line = xmalloc_fgetline(cmdfile)) != NULL) {
1547 add_cmd(line);
1548 free(line);
1549 }
1550 fclose_if_not_stdin(cmdfile);
1551 }
1552
1553 if (!(opt & 0x30)) {
1554 if (!*argv)
1555 bb_show_usage();
1556 add_cmd_block(*argv++);
1557 }
1558
1559 add_cmd("");
1560
1561
1562 G.nonstdout = stdout;
1563
1564
1565
1566
1567 G.input_file_list = argv;
1568 if (!argv[0]) {
1569 if (opt & OPT_in_place)
1570 bb_error_msg_and_die(bb_msg_requires_arg, "-i");
1571 argv[0] = (char*)bb_msg_standard_input;
1572
1573 } else {
1574 goto start;
1575
1576 for (; *argv; argv++) {
1577 struct stat statbuf;
1578 int nonstdoutfd;
1579 sed_cmd_t *sed_cmd;
1580
1581 G.last_input_file++;
1582 start:
1583 if (!(opt & OPT_in_place)) {
1584 if (LONE_DASH(*argv)) {
1585 *argv = (char*)bb_msg_standard_input;
1586 process_files();
1587 }
1588 continue;
1589 }
1590
1591
1592
1593 if (stat(*argv, &statbuf) != 0) {
1594 bb_simple_perror_msg(*argv);
1595 G.exitcode = EXIT_FAILURE;
1596 G.current_input_file++;
1597 continue;
1598 }
1599 G.outname = xasprintf("%sXXXXXX", *argv);
1600 nonstdoutfd = xmkstemp(G.outname);
1601 G.nonstdout = xfdopen_for_write(nonstdoutfd);
1602
1603
1604
1605 fchmod(nonstdoutfd, statbuf.st_mode);
1606 fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
1607
1608 process_files();
1609 fclose(G.nonstdout);
1610 G.nonstdout = stdout;
1611
1612 if (opt_i) {
1613 char *backupname = xasprintf("%s%s", *argv, opt_i);
1614 xrename(*argv, backupname);
1615 free(backupname);
1616 }
1617
1618 xrename(G.outname, *argv);
1619 free(G.outname);
1620 G.outname = NULL;
1621
1622
1623 for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1624 sed_cmd->beg_line = sed_cmd->beg_line_orig;
1625 sed_cmd->end_line = sed_cmd->end_line_orig;
1626 }
1627 }
1628
1629
1630
1631
1632
1633 }
1634
1635 process_files();
1636
1637 return G.exitcode;
1638}
1639