busybox/findutils/grep.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * Mini grep implementation for busybox using libc regex.
   4 *
   5 * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
   6 * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
   7 *
   8 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   9 */
  10/* BB_AUDIT SUSv3 defects - unsupported option -x "match whole line only". */
  11/* BB_AUDIT GNU defects - always acts as -a.  */
  12/* http://www.opengroup.org/onlinepubs/007904975/utilities/grep.html */
  13/*
  14 * 2004,2006 (C) Vladimir Oleynik <dzo@simtreas.ru> -
  15 * correction "-e pattern1 -e pattern2" logic and more optimizations.
  16 * precompiled regex
  17 *
  18 * (C) 2006 Jac Goudsmit added -o option
  19 */
  20//config:config GREP
  21//config:       bool "grep (8.6 kb)"
  22//config:       default y
  23//config:       help
  24//config:       grep is used to search files for a specified pattern.
  25//config:
  26//config:config EGREP
  27//config:       bool "egrep (7.8 kb)"
  28//config:       default y
  29//config:       help
  30//config:       Alias to "grep -E".
  31//config:
  32//config:config FGREP
  33//config:       bool "fgrep (7.8 kb)"
  34//config:       default y
  35//config:       help
  36//config:       Alias to "grep -F".
  37//config:
  38//config:config FEATURE_GREP_CONTEXT
  39//config:       bool "Enable before and after context flags (-A, -B and -C)"
  40//config:       default y
  41//config:       depends on GREP || EGREP || FGREP
  42//config:       help
  43//config:       Print the specified number of leading (-B) and/or trailing (-A)
  44//config:       context surrounding our matching lines.
  45//config:       Print the specified number of context lines (-C).
  46
  47//applet:IF_GREP(APPLET(grep, BB_DIR_BIN, BB_SUID_DROP))
  48//                APPLET_ODDNAME:name   main  location    suid_type     help
  49//applet:IF_EGREP(APPLET_ODDNAME(egrep, grep, BB_DIR_BIN, BB_SUID_DROP, egrep))
  50//applet:IF_FGREP(APPLET_ODDNAME(fgrep, grep, BB_DIR_BIN, BB_SUID_DROP, fgrep))
  51
  52//kbuild:lib-$(CONFIG_GREP) += grep.o
  53//kbuild:lib-$(CONFIG_EGREP) += grep.o
  54//kbuild:lib-$(CONFIG_FGREP) += grep.o
  55
  56#include "libbb.h"
  57#include "common_bufsiz.h"
  58#include "xregex.h"
  59
  60//usage:#define grep_trivial_usage
  61//usage:       "[-HhnlLoqvsrRiwFE"
  62//usage:        IF_EXTRA_COMPAT("z")
  63//usage:       "] [-m N] "
  64//usage:        IF_FEATURE_GREP_CONTEXT("[-A|B|C N] ")
  65//usage:       "{ PATTERN | -e PATTERN... | -f FILE... } [FILE]..."
  66//usage:#define grep_full_usage "\n\n"
  67//usage:       "Search for PATTERN in FILEs (or stdin)\n"
  68//usage:     "\n        -H      Add 'filename:' prefix"
  69//usage:     "\n        -h      Do not add 'filename:' prefix"
  70//usage:     "\n        -n      Add 'line_no:' prefix"
  71//usage:     "\n        -l      Show only names of files that match"
  72//usage:     "\n        -L      Show only names of files that don't match"
  73//usage:     "\n        -c      Show only count of matching lines"
  74//usage:     "\n        -o      Show only the matching part of line"
  75//usage:     "\n        -q      Quiet. Return 0 if PATTERN is found, 1 otherwise"
  76//usage:     "\n        -v      Select non-matching lines"
  77//usage:     "\n        -s      Suppress open and read errors"
  78//usage:     "\n        -r      Recurse"
  79//usage:     "\n        -R      Recurse and dereference symlinks"
  80//usage:     "\n        -i      Ignore case"
  81//usage:     "\n        -w      Match whole words only"
  82//usage:     "\n        -x      Match whole lines only"
  83//usage:     "\n        -F      PATTERN is a literal (not regexp)"
  84//usage:     "\n        -E      PATTERN is an extended regexp"
  85//usage:        IF_EXTRA_COMPAT(
  86//usage:     "\n        -z      NUL terminated input"
  87//usage:        )
  88//usage:     "\n        -m N    Match up to N times per file"
  89//usage:        IF_FEATURE_GREP_CONTEXT(
  90//usage:     "\n        -A N    Print N lines of trailing context"
  91//usage:     "\n        -B N    Print N lines of leading context"
  92//usage:     "\n        -C N    Same as '-A N -B N'"
  93//usage:        )
  94//usage:     "\n        -e PTRN Pattern to match"
  95//usage:     "\n        -f FILE Read pattern from file"
  96//usage:
  97//usage:#define grep_example_usage
  98//usage:       "$ grep root /etc/passwd\n"
  99//usage:       "root:x:0:0:root:/root:/bin/bash\n"
 100//usage:       "$ grep ^[rR]oo. /etc/passwd\n"
 101//usage:       "root:x:0:0:root:/root:/bin/bash\n"
 102//usage:
 103//usage:#define egrep_trivial_usage NOUSAGE_STR
 104//usage:#define egrep_full_usage ""
 105//usage:#define fgrep_trivial_usage NOUSAGE_STR
 106//usage:#define fgrep_full_usage ""
 107
 108/* -e,-f are lists; -m,-A,-B,-C have numeric param */
 109#define OPTSTR_GREP \
 110        "lnqvscFiHhe:*f:*LorRm:+wx" \
 111        IF_FEATURE_GREP_CONTEXT("A:+B:+C:+") \
 112        "E" \
 113        IF_EXTRA_COMPAT("z") \
 114        "aI"
 115/* ignored: -a "assume all files to be text" */
 116/* ignored: -I "assume binary files have no matches" */
 117enum {
 118        OPTBIT_l, /* list matched file names only */
 119        OPTBIT_n, /* print line# */
 120        OPTBIT_q, /* quiet - exit(EXIT_SUCCESS) of first match */
 121        OPTBIT_v, /* invert the match, to select non-matching lines */
 122        OPTBIT_s, /* suppress errors about file open errors */
 123        OPTBIT_c, /* count matches per file (suppresses normal output) */
 124        OPTBIT_F, /* literal match */
 125        OPTBIT_i, /* case-insensitive */
 126        OPTBIT_H, /* force filename display */
 127        OPTBIT_h, /* inhibit filename display */
 128        OPTBIT_e, /* -e PATTERN */
 129        OPTBIT_f, /* -f FILE_WITH_PATTERNS */
 130        OPTBIT_L, /* list unmatched file names only */
 131        OPTBIT_o, /* show only matching parts of lines */
 132        OPTBIT_r, /* recurse dirs */
 133        OPTBIT_R, /* recurse dirs and symlinks to dirs */
 134        OPTBIT_m, /* -m MAX_MATCHES */
 135        OPTBIT_w, /* -w whole word match */
 136        OPTBIT_x, /* -x whole line match */
 137        IF_FEATURE_GREP_CONTEXT(    OPTBIT_A ,) /* -A NUM: after-match context */
 138        IF_FEATURE_GREP_CONTEXT(    OPTBIT_B ,) /* -B NUM: before-match context */
 139        IF_FEATURE_GREP_CONTEXT(    OPTBIT_C ,) /* -C NUM: -A and -B combined */
 140        OPTBIT_E, /* extended regexp */
 141        IF_EXTRA_COMPAT(            OPTBIT_z ,) /* input is NUL terminated */
 142        OPT_l = 1 << OPTBIT_l,
 143        OPT_n = 1 << OPTBIT_n,
 144        OPT_q = 1 << OPTBIT_q,
 145        OPT_v = 1 << OPTBIT_v,
 146        OPT_s = 1 << OPTBIT_s,
 147        OPT_c = 1 << OPTBIT_c,
 148        OPT_F = 1 << OPTBIT_F,
 149        OPT_i = 1 << OPTBIT_i,
 150        OPT_H = 1 << OPTBIT_H,
 151        OPT_h = 1 << OPTBIT_h,
 152        OPT_e = 1 << OPTBIT_e,
 153        OPT_f = 1 << OPTBIT_f,
 154        OPT_L = 1 << OPTBIT_L,
 155        OPT_o = 1 << OPTBIT_o,
 156        OPT_r = 1 << OPTBIT_r,
 157        OPT_R = 1 << OPTBIT_R,
 158        OPT_m = 1 << OPTBIT_m,
 159        OPT_w = 1 << OPTBIT_w,
 160        OPT_x = 1 << OPTBIT_x,
 161        OPT_A = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_A)) + 0,
 162        OPT_B = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_B)) + 0,
 163        OPT_C = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_C)) + 0,
 164        OPT_E = 1 << OPTBIT_E,
 165        OPT_z = IF_EXTRA_COMPAT(            (1 << OPTBIT_z)) + 0,
 166};
 167
 168#define PRINT_LINE_NUM              (option_mask32 & OPT_n)
 169#define BE_QUIET                    (option_mask32 & OPT_q)
 170#define SUPPRESS_ERR_MSGS           (option_mask32 & OPT_s)
 171#define PRINT_MATCH_COUNTS          (option_mask32 & OPT_c)
 172#define FGREP_FLAG                  (option_mask32 & OPT_F)
 173#define NUL_DELIMITED               (option_mask32 & OPT_z)
 174
 175struct globals {
 176        int max_matches;
 177#if !ENABLE_EXTRA_COMPAT
 178        int reflags;
 179#else
 180        RE_TRANSLATE_TYPE case_fold; /* RE_TRANSLATE_TYPE is [[un]signed] char* */
 181#endif
 182        smalluint invert_search;
 183        smalluint print_filename;
 184        smalluint open_errors;
 185#if ENABLE_FEATURE_GREP_CONTEXT
 186        smalluint did_print_line;
 187        int lines_before;
 188        int lines_after;
 189        char **before_buf;
 190        IF_EXTRA_COMPAT(size_t *before_buf_size;)
 191        int last_line_printed;
 192#endif
 193        /* globals used internally */
 194        llist_t *pattern_head;   /* growable list of patterns to match */
 195        const char *cur_file;    /* the current file we are reading */
 196} FIX_ALIASING;
 197#define G (*(struct globals*)bb_common_bufsiz1)
 198#define INIT_G() do { \
 199        setup_common_bufsiz(); \
 200        BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
 201} while (0)
 202#define max_matches       (G.max_matches         )
 203#if !ENABLE_EXTRA_COMPAT
 204# define reflags          (G.reflags             )
 205#else
 206# define case_fold        (G.case_fold           )
 207/* http://www.delorie.com/gnu/docs/regex/regex_46.html */
 208# define reflags           re_syntax_options
 209# undef REG_NOSUB
 210# undef REG_EXTENDED
 211# undef REG_ICASE
 212# define REG_NOSUB    bug:is:here /* should not be used */
 213/* Just RE_SYNTAX_EGREP is not enough, need to enable {n[,[m]]} too */
 214# define REG_EXTENDED (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
 215# define REG_ICASE    bug:is:here /* should not be used */
 216#endif
 217#define invert_search     (G.invert_search       )
 218#define print_filename    (G.print_filename      )
 219#define open_errors       (G.open_errors         )
 220#define did_print_line    (G.did_print_line      )
 221#define lines_before      (G.lines_before        )
 222#define lines_after       (G.lines_after         )
 223#define before_buf        (G.before_buf          )
 224#define before_buf_size   (G.before_buf_size     )
 225#define last_line_printed (G.last_line_printed   )
 226#define pattern_head      (G.pattern_head        )
 227#define cur_file          (G.cur_file            )
 228
 229
 230typedef struct grep_list_data_t {
 231        char *pattern;
 232/* for GNU regex, matched_range must be persistent across grep_file() calls */
 233#if !ENABLE_EXTRA_COMPAT
 234        regex_t compiled_regex;
 235        regmatch_t matched_range;
 236#else
 237        struct re_pattern_buffer compiled_regex;
 238        struct re_registers matched_range;
 239#endif
 240#define ALLOCATED 1
 241#define COMPILED 2
 242        int flg_mem_allocated_compiled;
 243} grep_list_data_t;
 244
 245#if !ENABLE_EXTRA_COMPAT
 246#define print_line(line, line_len, linenum, decoration) \
 247        print_line(line, linenum, decoration)
 248#endif
 249static void print_line(const char *line, size_t line_len, int linenum, char decoration)
 250{
 251#if ENABLE_FEATURE_GREP_CONTEXT
 252        /* Happens when we go to next file, immediately hit match
 253         * and try to print prev context... from prev file! Don't do it */
 254        if (linenum < 1)
 255                return;
 256        /* possibly print the little '--' separator */
 257        if ((lines_before || lines_after) && did_print_line
 258         && last_line_printed != linenum - 1
 259        ) {
 260                puts("--");
 261        }
 262        /* guard against printing "--" before first line of first file */
 263        did_print_line = 1;
 264        last_line_printed = linenum;
 265#endif
 266        if (print_filename)
 267                printf("%s%c", cur_file, decoration);
 268        if (PRINT_LINE_NUM)
 269                printf("%i%c", linenum, decoration);
 270        /* Emulate weird GNU grep behavior with -ov */
 271        if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o)) {
 272#if !ENABLE_EXTRA_COMPAT
 273                puts(line);
 274#else
 275                fwrite(line, 1, line_len, stdout);
 276                putchar(NUL_DELIMITED ? '\0' : '\n');
 277#endif
 278        }
 279}
 280
 281#if ENABLE_EXTRA_COMPAT
 282/* Unlike getline, this one removes trailing '\n' */
 283static ssize_t FAST_FUNC bb_getline(char **line_ptr, size_t *line_alloc_len, FILE *file)
 284{
 285        ssize_t res_sz;
 286        char *line;
 287        int delim = (NUL_DELIMITED ? '\0' : '\n');
 288
 289        res_sz = getdelim(line_ptr, line_alloc_len, delim, file);
 290        line = *line_ptr;
 291
 292        if (res_sz > 0) {
 293                if (line[res_sz - 1] == delim)
 294                        line[--res_sz] = '\0';
 295        } else {
 296                free(line); /* uclibc allocates a buffer even on EOF. WTF? */
 297        }
 298        return res_sz;
 299}
 300#endif
 301
 302static int grep_file(FILE *file)
 303{
 304        smalluint found;
 305        int linenum = 0;
 306        int nmatches = 0;
 307#if !ENABLE_EXTRA_COMPAT
 308        char *line;
 309#else
 310        char *line = NULL;
 311        ssize_t line_len;
 312        size_t line_alloc_len;
 313# define rm_so start[0]
 314# define rm_eo end[0]
 315#endif
 316#if ENABLE_FEATURE_GREP_CONTEXT
 317        int print_n_lines_after = 0;
 318        int curpos = 0; /* track where we are in the circular 'before' buffer */
 319        int idx = 0; /* used for iteration through the circular buffer */
 320#else
 321        enum { print_n_lines_after = 0 };
 322#endif
 323
 324        while (
 325#if !ENABLE_EXTRA_COMPAT
 326                (line = xmalloc_fgetline(file)) != NULL
 327#else
 328                (line_len = bb_getline(&line, &line_alloc_len, file)) >= 0
 329#endif
 330        ) {
 331                llist_t *pattern_ptr = pattern_head;
 332                grep_list_data_t *gl = gl; /* for gcc */
 333
 334                linenum++;
 335                found = 0;
 336                while (pattern_ptr) {
 337                        gl = (grep_list_data_t *)pattern_ptr->data;
 338                        if (FGREP_FLAG) {
 339                                char *match;
 340                                char *str = line;
 341 opt_f_again:
 342                                match = ((option_mask32 & OPT_i)
 343                                        ? strcasestr(str, gl->pattern)
 344                                        : strstr(str, gl->pattern)
 345                                        );
 346                                if (match) {
 347                                        if (option_mask32 & OPT_x) {
 348                                                if (match != str)
 349                                                        goto opt_f_not_found;
 350                                                if (str[strlen(gl->pattern)] != '\0')
 351                                                        goto opt_f_not_found;
 352                                        } else
 353                                        if (option_mask32 & OPT_w) {
 354                                                char c = (match != line) ? match[-1] : ' ';
 355                                                if (!isalnum(c) && c != '_') {
 356                                                        c = match[strlen(gl->pattern)];
 357                                                        if (!c || (!isalnum(c) && c != '_'))
 358                                                                goto opt_f_found;
 359                                                }
 360                                                str = match + 1;
 361                                                goto opt_f_again;
 362                                        }
 363 opt_f_found:
 364                                        found = 1;
 365 opt_f_not_found: ;
 366                                }
 367                        } else {
 368#if ENABLE_EXTRA_COMPAT
 369                                unsigned start_pos;
 370#else
 371                                int match_flg;
 372#endif
 373                                char *match_at;
 374
 375                                if (!(gl->flg_mem_allocated_compiled & COMPILED)) {
 376                                        gl->flg_mem_allocated_compiled |= COMPILED;
 377#if !ENABLE_EXTRA_COMPAT
 378                                        xregcomp(&gl->compiled_regex, gl->pattern, reflags);
 379#else
 380                                        memset(&gl->compiled_regex, 0, sizeof(gl->compiled_regex));
 381                                        gl->compiled_regex.translate = case_fold; /* for -i */
 382                                        if (re_compile_pattern(gl->pattern, strlen(gl->pattern), &gl->compiled_regex))
 383                                                bb_error_msg_and_die("bad regex '%s'", gl->pattern);
 384#endif
 385                                }
 386#if !ENABLE_EXTRA_COMPAT
 387                                gl->matched_range.rm_so = 0;
 388                                gl->matched_range.rm_eo = 0;
 389                                match_flg = 0;
 390#else
 391                                start_pos = 0;
 392#endif
 393                                match_at = line;
 394 opt_w_again:
 395//bb_error_msg("'%s' start_pos:%d line_len:%d", match_at, start_pos, line_len);
 396                                if (
 397#if !ENABLE_EXTRA_COMPAT
 398                                        regexec(&gl->compiled_regex, match_at, 1, &gl->matched_range, match_flg) == 0
 399#else
 400                                        re_search(&gl->compiled_regex, match_at, line_len,
 401                                                        start_pos, /*range:*/ line_len,
 402                                                        &gl->matched_range) >= 0
 403#endif
 404                                ) {
 405                                        if (option_mask32 & OPT_x) {
 406                                                found |= (gl->matched_range.rm_so == 0
 407                                                         && match_at[gl->matched_range.rm_eo] == '\0');
 408                                        } else
 409                                        if (!(option_mask32 & OPT_w)) {
 410                                                found = 1;
 411                                        } else {
 412                                                char c = ' ';
 413                                                if (match_at > line || gl->matched_range.rm_so != 0) {
 414                                                        c = match_at[gl->matched_range.rm_so - 1];
 415                                                }
 416                                                if (!isalnum(c) && c != '_') {
 417                                                        c = match_at[gl->matched_range.rm_eo];
 418                                                }
 419                                                if (!isalnum(c) && c != '_') {
 420                                                        found = 1;
 421                                                } else {
 422                        /*
 423                         * Why check gl->matched_range.rm_eo?
 424                         * Zero-length match makes -w skip the line:
 425                         * "echo foo | grep ^" prints "foo",
 426                         * "echo foo | grep -w ^" prints nothing.
 427                         * Without such check, we can loop forever.
 428                         */
 429#if !ENABLE_EXTRA_COMPAT
 430                                                        if (gl->matched_range.rm_eo != 0) {
 431                                                                match_at += gl->matched_range.rm_eo;
 432                                                                match_flg |= REG_NOTBOL;
 433                                                                goto opt_w_again;
 434                                                        }
 435#else
 436                                                        if (gl->matched_range.rm_eo > start_pos) {
 437                                                                start_pos = gl->matched_range.rm_eo;
 438                                                                goto opt_w_again;
 439                                                        }
 440#endif
 441                                                }
 442                                        }
 443                                }
 444                        }
 445                        /* If it's a non-inverted search, we can stop
 446                         * at first match and report it.
 447                         * If it's an inverted search, we can move on
 448                         * to the next line of input, ignoring the
 449                         * rest of the patterns.
 450                         */
 451                        if (found) {
 452                                //if (invert_search)
 453                                //      goto do_not_found;
 454                                //goto do_found;
 455                                break; // this accomplishes both
 456                        }
 457                        pattern_ptr = pattern_ptr->link;
 458                } /* while (pattern_ptr) */
 459
 460                if (found ^ invert_search) {
 461 //do_found:
 462                        /* keep track of matches */
 463                        nmatches++;
 464
 465                        /* quiet/print (non)matching file names only? */
 466                        if (option_mask32 & (OPT_q|OPT_l|OPT_L)) {
 467                                free(line); /* we don't need line anymore */
 468                                if (BE_QUIET) {
 469                                        /* manpage says about -q:
 470                                         * "exit immediately with zero status
 471                                         * if any match is found,
 472                                         * even if errors were detected" */
 473                                        exit(EXIT_SUCCESS);
 474                                }
 475                                /* -l "print filenames with matches": stop after the first match */
 476                                if (option_mask32 & OPT_l) {
 477                                        puts(cur_file);
 478                                        return 1;
 479                                }
 480                                /* -L "print filenames without matches": return early too */
 481                                return 0; /* 0: we do not print fname, hence it's "not a match" */
 482                        }
 483
 484#if ENABLE_FEATURE_GREP_CONTEXT
 485                        /* Were we printing context and saw next (unwanted) match? */
 486                        if ((option_mask32 & OPT_m) && nmatches > max_matches)
 487                                break;
 488#endif
 489
 490                        /* print the matched line */
 491                        if (PRINT_MATCH_COUNTS == 0) {
 492#if ENABLE_FEATURE_GREP_CONTEXT
 493                                int prevpos = (curpos == 0) ? lines_before - 1 : curpos - 1;
 494
 495                                /* if we were told to print 'before' lines and there is at least
 496                                 * one line in the circular buffer, print them */
 497                                if (lines_before && before_buf[prevpos] != NULL) {
 498                                        int first_buf_entry_line_num = linenum - lines_before;
 499
 500                                        /* advance to the first entry in the circular buffer, and
 501                                         * figure out the line number is of the first line in the
 502                                         * buffer */
 503                                        idx = curpos;
 504                                        while (before_buf[idx] == NULL) {
 505                                                idx = (idx + 1) % lines_before;
 506                                                first_buf_entry_line_num++;
 507                                        }
 508
 509                                        /* now print each line in the buffer, clearing them as we go */
 510                                        while (before_buf[idx] != NULL) {
 511                                                print_line(before_buf[idx], before_buf_size[idx], first_buf_entry_line_num, '-');
 512                                                free(before_buf[idx]);
 513                                                before_buf[idx] = NULL;
 514                                                idx = (idx + 1) % lines_before;
 515                                                first_buf_entry_line_num++;
 516                                        }
 517                                }
 518
 519                                /* make a note that we need to print 'after' lines */
 520                                print_n_lines_after = lines_after;
 521#endif
 522                                if (option_mask32 & OPT_o) {
 523                                        if (FGREP_FLAG) {
 524                                                /* -Fo just prints the pattern
 525                                                 * (unless -v: -Fov doesn't print anything at all) */
 526                                                if (found)
 527                                                        print_line(gl->pattern, strlen(gl->pattern), linenum, ':');
 528                                        } else while (1) {
 529                                                unsigned start = gl->matched_range.rm_so;
 530                                                unsigned end = gl->matched_range.rm_eo;
 531                                                unsigned len = end - start;
 532                                                char old = line[end];
 533                                                line[end] = '\0';
 534                                                /* Empty match is not printed: try "echo test | grep -o ''" */
 535                                                if (len != 0)
 536                                                        print_line(line + start, len, linenum, ':');
 537                                                if (old == '\0')
 538                                                        break;
 539                                                line[end] = old;
 540                                                if (len == 0)
 541                                                        end++;
 542#if !ENABLE_EXTRA_COMPAT
 543                                                if (regexec(&gl->compiled_regex, line + end,
 544                                                                1, &gl->matched_range, REG_NOTBOL) != 0)
 545                                                        break;
 546                                                gl->matched_range.rm_so += end;
 547                                                gl->matched_range.rm_eo += end;
 548#else
 549                                                if (re_search(&gl->compiled_regex, line, line_len,
 550                                                                end, line_len - end,
 551                                                                &gl->matched_range) < 0)
 552                                                        break;
 553#endif
 554                                        }
 555                                } else {
 556                                        print_line(line, line_len, linenum, ':');
 557                                }
 558                        }
 559                }
 560#if ENABLE_FEATURE_GREP_CONTEXT
 561                else { /* no match */
 562 //do_not_found:
 563                        /* if we need to print some context lines after the last match, do so */
 564                        if (print_n_lines_after) {
 565                                print_line(line, strlen(line), linenum, '-');
 566                                print_n_lines_after--;
 567                        } else if (lines_before) {
 568                                /* Add the line to the circular 'before' buffer */
 569                                free(before_buf[curpos]);
 570                                before_buf[curpos] = line;
 571                                IF_EXTRA_COMPAT(before_buf_size[curpos] = line_len;)
 572                                curpos = (curpos + 1) % lines_before;
 573                                /* avoid free(line) - we took the line */
 574                                line = NULL;
 575                        }
 576                }
 577
 578#endif /* ENABLE_FEATURE_GREP_CONTEXT */
 579#if !ENABLE_EXTRA_COMPAT
 580                free(line);
 581#endif
 582                /* Did we print all context after last requested match? */
 583                if ((option_mask32 & OPT_m)
 584                 && !print_n_lines_after
 585                 && nmatches == max_matches
 586                ) {
 587                        break;
 588                }
 589        } /* while (read line) */
 590
 591        /* special-case file post-processing for options where we don't print line
 592         * matches, just filenames and possibly match counts */
 593
 594        /* grep -c: print [filename:]count, even if count is zero */
 595        if (PRINT_MATCH_COUNTS) {
 596                if (print_filename)
 597                        printf("%s:", cur_file);
 598                printf("%d\n", nmatches);
 599        }
 600
 601        /* grep -L: "print filenames without matches" */
 602        if (option_mask32 & OPT_L) {
 603                /* nmatches is zero, no need to check it:
 604                 * we return 0 early if -L and we detect a match
 605                 */
 606                puts(cur_file);
 607                return 1; /* 1: we printed fname, hence it's "a match" */
 608        }
 609
 610        return nmatches != 0; /* we return not a count, but a boolean */
 611}
 612
 613#if ENABLE_FEATURE_CLEAN_UP
 614#define new_grep_list_data(p, m) add_grep_list_data(p, m)
 615static char *add_grep_list_data(char *pattern, int flg_used_mem)
 616#else
 617#define new_grep_list_data(p, m) add_grep_list_data(p)
 618static char *add_grep_list_data(char *pattern)
 619#endif
 620{
 621        grep_list_data_t *gl = xzalloc(sizeof(*gl));
 622        gl->pattern = pattern;
 623#if ENABLE_FEATURE_CLEAN_UP
 624        gl->flg_mem_allocated_compiled = flg_used_mem;
 625#else
 626        /*gl->flg_mem_allocated_compiled = 0;*/
 627#endif
 628        return (char *)gl;
 629}
 630
 631static void load_regexes_from_file(llist_t *fopt)
 632{
 633        while (fopt) {
 634                char *line;
 635                FILE *fp;
 636                llist_t *cur = fopt;
 637                char *ffile = cur->data;
 638
 639                fopt = cur->link;
 640                free(cur);
 641                fp = xfopen_stdin(ffile);
 642                while ((line = xmalloc_fgetline(fp)) != NULL) {
 643                        llist_add_to(&pattern_head,
 644                                new_grep_list_data(line, ALLOCATED));
 645                }
 646                fclose_if_not_stdin(fp);
 647        }
 648}
 649
 650static void load_pattern_list(llist_t **lst, char *pattern)
 651{
 652        char *p;
 653        while ((p = strsep(&pattern, "\n")) != NULL)
 654                llist_add_to(lst, new_grep_list_data(p, 0));
 655}
 656
 657static int FAST_FUNC file_action_grep(struct recursive_state *state UNUSED_PARAM,
 658                const char *filename,
 659                struct stat *statbuf)
 660{
 661        FILE *file;
 662
 663        /* If we are given a link to a directory, we should bail out now, rather
 664         * than trying to open the "file" and hoping getline gives us nothing,
 665         * since that is not portable across operating systems (FreeBSD for
 666         * example will return the raw directory contents). */
 667        if (S_ISLNK(statbuf->st_mode)) {
 668                struct stat sb;
 669                if (stat(filename, &sb) != 0) {
 670                        if (!SUPPRESS_ERR_MSGS)
 671                                bb_simple_perror_msg(filename);
 672                        return 0;
 673                }
 674                if (S_ISDIR(sb.st_mode))
 675                        return 1;
 676        }
 677
 678        file = fopen_for_read(filename);
 679        if (file == NULL) {
 680                if (!SUPPRESS_ERR_MSGS)
 681                        bb_simple_perror_msg(filename);
 682                open_errors = 1;
 683                return 0;
 684        }
 685        cur_file = filename;
 686        *(int*)state->userData |= grep_file(file);
 687        fclose(file);
 688        return 1;
 689}
 690
 691static int grep_dir(const char *dir)
 692{
 693        int matched = 0;
 694        recursive_action(dir, 0
 695                | ACTION_RECURSE
 696                | ((option_mask32 & OPT_R) ? ACTION_FOLLOWLINKS : 0)
 697                | ACTION_FOLLOWLINKS_L0 /* grep -r ... SYMLINK follows it */
 698                | ACTION_DEPTHFIRST
 699                | 0,
 700                /* fileAction= */ file_action_grep,
 701                /* dirAction= */ NULL,
 702                /* userData= */ &matched
 703        );
 704        return matched;
 705}
 706
 707int grep_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 708int grep_main(int argc UNUSED_PARAM, char **argv)
 709{
 710        FILE *file;
 711        int matched;
 712        llist_t *fopt = NULL;
 713#if ENABLE_FEATURE_GREP_CONTEXT
 714        int Copt, opts;
 715#endif
 716        INIT_G();
 717
 718        /* For grep, exitcode of 1 is "not found". Other errors are 2: */
 719        xfunc_error_retval = 2;
 720
 721        /* do normal option parsing */
 722#if ENABLE_FEATURE_GREP_CONTEXT
 723        /* -H unsets -h; -C unsets -A,-B */
 724        opts = getopt32long(argv, "^"
 725                OPTSTR_GREP
 726                        "\0"
 727                        "H-h:C-AB",
 728                "color\0" Optional_argument "\xff",
 729                &pattern_head, &fopt, &max_matches,
 730                &lines_after, &lines_before, &Copt
 731                , NULL
 732        );
 733
 734        if (opts & OPT_C) {
 735                /* -C unsets prev -A and -B, but following -A or -B
 736                 * may override it */
 737                if (!(opts & OPT_A)) /* not overridden */
 738                        lines_after = Copt;
 739                if (!(opts & OPT_B)) /* not overridden */
 740                        lines_before = Copt;
 741        }
 742        /* sanity checks */
 743        if (opts & (OPT_c|OPT_q|OPT_l|OPT_L)) {
 744                option_mask32 &= ~OPT_n;
 745                lines_before = 0;
 746                lines_after = 0;
 747        } else if (lines_before > 0) {
 748                if (lines_before > INT_MAX / sizeof(long long))
 749                        lines_before = INT_MAX / sizeof(long long);
 750                /* overflow in (lines_before * sizeof(x)) is prevented (above) */
 751                before_buf = xzalloc(lines_before * sizeof(before_buf[0]));
 752                IF_EXTRA_COMPAT(before_buf_size = xzalloc(lines_before * sizeof(before_buf_size[0]));)
 753        }
 754#else
 755        /* with auto sanity checks */
 756        getopt32(argv, "^" OPTSTR_GREP "\0" "H-h:c-n:q-n:l-n:", // why trailing ":"?
 757                &pattern_head, &fopt, &max_matches);
 758#endif
 759        invert_search = ((option_mask32 & OPT_v) != 0); /* 0 | 1 */
 760
 761        {       /* convert char **argv to pattern_list */
 762                llist_t *cur, *new = NULL;
 763                for (cur = pattern_head; cur; cur = cur->link)
 764                        load_pattern_list(&new, cur->data);
 765                llist_free(pattern_head, NULL);
 766                pattern_head = new;
 767        }
 768        if (option_mask32 & OPT_f) {
 769                load_regexes_from_file(fopt);
 770                if (!pattern_head) { /* -f EMPTY_FILE? */
 771                        /* GNU grep treats it as "nothing matches" except when -x */
 772                        const char *data = (option_mask32 & OPT_x) ? ".*" : "";
 773                        llist_add_to(&pattern_head, new_grep_list_data((char*)data, 0));
 774                        invert_search ^= 1;
 775                }
 776        }
 777
 778        if (ENABLE_FGREP && applet_name[0] == 'f')
 779                option_mask32 |= OPT_F;
 780
 781#if !ENABLE_EXTRA_COMPAT
 782        if (!(option_mask32 & (OPT_o | OPT_w | OPT_x)))
 783                reflags = REG_NOSUB;
 784#endif
 785
 786        if ((ENABLE_EGREP && applet_name[0] == 'e')
 787         || (option_mask32 & OPT_E)
 788        ) {
 789                reflags |= REG_EXTENDED;
 790        }
 791#if ENABLE_EXTRA_COMPAT
 792        else {
 793                reflags = RE_SYNTAX_GREP;
 794        }
 795#endif
 796
 797        if (option_mask32 & OPT_i) {
 798#if !ENABLE_EXTRA_COMPAT
 799                reflags |= REG_ICASE;
 800#else
 801                int i;
 802                case_fold = xmalloc(256);
 803                for (i = 0; i < 256; i++)
 804                        case_fold[i] = (unsigned char)i;
 805                for (i = 'a'; i <= 'z'; i++)
 806                        case_fold[i] = (unsigned char)(i - ('a' - 'A'));
 807#endif
 808        }
 809
 810        argv += optind;
 811
 812        /* if we didn't get a pattern from -e and no command file was specified,
 813         * first parameter should be the pattern. no pattern, no worky */
 814        if (pattern_head == NULL) {
 815                if (*argv == NULL)
 816                        bb_show_usage();
 817                load_pattern_list(&pattern_head, *argv++);
 818        }
 819
 820        /* argv[0..(argc-1)] should be names of file to grep through. If
 821         * there is more than one file to grep, we will print the filenames. */
 822        if (argv[0] && argv[1])
 823                print_filename = 1;
 824        /* -H / -h of course override */
 825        if (option_mask32 & OPT_H)
 826                print_filename = 1;
 827        if (option_mask32 & OPT_h)
 828                print_filename = 0;
 829
 830        /* If no files were specified, or '-' was specified, take input from
 831         * stdin. Otherwise, we grep through all the files specified. */
 832        matched = 0;
 833        do {
 834                cur_file = *argv;
 835                file = stdin;
 836                if (!cur_file || LONE_DASH(cur_file)) {
 837                        cur_file = "(standard input)";
 838                } else {
 839                        if (option_mask32 & (OPT_r|OPT_R)) {
 840                                struct stat st;
 841                                if (stat(cur_file, &st) == 0 && S_ISDIR(st.st_mode)) {
 842                                        if (!(option_mask32 & OPT_h))
 843                                                print_filename = 1;
 844                                        matched |= grep_dir(cur_file);
 845                                        goto grep_done;
 846                                }
 847                        }
 848                        /* else: fopen(dir) will succeed, but reading won't */
 849                        file = fopen_for_read(cur_file);
 850                        if (file == NULL) {
 851                                if (!SUPPRESS_ERR_MSGS)
 852                                        bb_simple_perror_msg(cur_file);
 853                                open_errors = 1;
 854                                continue;
 855                        }
 856                }
 857                matched |= grep_file(file);
 858                fclose_if_not_stdin(file);
 859 grep_done: ;
 860        } while (*argv && *++argv);
 861
 862        /* destroy all the elements in the pattern list */
 863        if (ENABLE_FEATURE_CLEAN_UP) {
 864                while (pattern_head) {
 865                        llist_t *pattern_head_ptr = pattern_head;
 866                        grep_list_data_t *gl = (grep_list_data_t *)pattern_head_ptr->data;
 867
 868                        pattern_head = pattern_head->link;
 869                        if (gl->flg_mem_allocated_compiled & ALLOCATED)
 870                                free(gl->pattern);
 871                        if (gl->flg_mem_allocated_compiled & COMPILED)
 872                                regfree(&gl->compiled_regex);
 873                        free(gl);
 874                        free(pattern_head_ptr);
 875                }
 876        }
 877        /* 0 = success, 1 = failed, 2 = error */
 878        if (open_errors)
 879                return 2;
 880        return !matched; /* invert return value: 0 = success, 1 = failed */
 881}
 882