toybox/toys/posix/grep.c
<<
>>
Prefs
   1/* grep.c - show lines matching regular expressions
   2 *
   3 * Copyright 2013 CE Strake <strake888 at gmail.com>
   4 *
   5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
   6 *
   7 * Posix doesn't even specify -r, documenting deviations from it is silly.
   8* echo hello | grep -w ''
   9* echo '' | grep -w ''
  10* echo hello | grep -f </dev/null
  11*
  12
  13USE_GREP(NEWTOY(grep, "(line-buffered)(color):;(exclude-dir)*S(exclude)*M(include)*ZzEFHIab(byte-offset)h(no-filename)ino(only-matching)rRsvwcl(files-with-matches)q(quiet)(silent)e*f*C#B#A#m#x[!wx][!EFw]", TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
  14USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
  15USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
  16
  17config GREP
  18  bool "grep"
  19  default y
  20  help
  21    usage: grep [-EFrivwcloqsHbhn] [-ABC NUM] [-m MAX] [-e REGEX]... [-MS PATTERN]... [-f REGFILE] [FILE]...
  22
  23    Show lines matching regular expressions. If no -e, first argument is
  24    regular expression to match. With no files (or "-" filename) read stdin.
  25    Returns 0 if matched, 1 if no match found, 2 for command errors.
  26
  27    -e  Regex to match. (May be repeated.)
  28    -f  File listing regular expressions to match.
  29
  30    file search:
  31    -r  Recurse into subdirectories (defaults FILE to ".")
  32    -R  Recurse into subdirectories and symlinks to directories
  33    -M  Match filename pattern (--include)
  34    -S  Skip filename pattern (--exclude)
  35    --exclude-dir=PATTERN  Skip directory pattern
  36    -I  Ignore binary files
  37
  38    match type:
  39    -A  Show NUM lines after     -B  Show NUM lines before match
  40    -C  NUM lines context (A+B)  -E  extended regex syntax
  41    -F  fixed (literal match)    -a  always text (not binary)
  42    -i  case insensitive         -m  match MAX many lines
  43    -v  invert match             -w  whole word (implies -E)
  44    -x  whole line               -z  input NUL terminated
  45
  46    display modes: (default: matched line)
  47    -c  count of matching lines  -l  show only matching filenames
  48    -o  only matching part       -q  quiet (errors only)
  49    -s  silent (no error msg)    -Z  output NUL terminated
  50
  51    output prefix (default: filename if checking more than 1 file)
  52    -H  force filename           -b  byte offset of match
  53    -h  hide filename            -n  line number of match
  54
  55config EGREP
  56  bool
  57  default y
  58  depends on GREP
  59
  60config FGREP
  61  bool
  62  default y
  63  depends on GREP
  64*/
  65
  66#define FOR_grep
  67#include "toys.h"
  68#include <regex.h>
  69
  70GLOBALS(
  71  long m, A, B, C;
  72  struct arg_list *f, *e, *M, *S, *exclude_dir;
  73  char *color;
  74
  75  char *purple, *cyan, *red, *green, *grey;
  76  struct double_list *reg;
  77  char indelim, outdelim;
  78  int found, tried;
  79)
  80
  81struct reg {
  82  struct reg *next, *prev;
  83  int rc;
  84  regex_t r;
  85  regmatch_t m;
  86};
  87
  88static void numdash(long num, char dash)
  89{
  90  printf("%s%ld%s%c", TT.green, num, TT.cyan, dash);
  91}
  92
  93// Emit line with various potential prefixes and delimiter
  94static void outline(char *line, char dash, char *name, long lcount, long bcount,
  95  unsigned trim)
  96{
  97  if (!trim && FLAG(o)) return;
  98  if (name && FLAG(H)) printf("%s%s%s%c", TT.purple, name, TT.cyan, dash);
  99  if (FLAG(c)) {
 100    printf("%s%ld", TT.grey, lcount);
 101    xputc(TT.outdelim);
 102  } else if (lcount && FLAG(n)) numdash(lcount, dash);
 103  if (bcount && FLAG(b)) numdash(bcount-1, dash);
 104  if (line) {
 105    if (FLAG(color)) xputsn(FLAG(o) ? TT.red : TT.grey);
 106    // support embedded NUL bytes in output
 107    xputsl(line, trim);
 108    xputc(TT.outdelim);
 109  }
 110}
 111
 112// Show matches in one file
 113static void do_grep(int fd, char *name)
 114{
 115  long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
 116  struct double_list *dlb = 0;
 117  char *bars = 0;
 118  FILE *file;
 119  int bin = 0;
 120
 121  if (!FLAG(r)) TT.tried++;
 122  if (!fd) name = "(standard input)";
 123
 124  // Only run binary file check on lseekable files.
 125  if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) {
 126    char buf[256];
 127    int len, i = 0;
 128    wchar_t wc;
 129
 130    // If the first 256 bytes don't parse as utf8, call it binary.
 131    if (0<(len = read(fd, buf, 256))) {
 132      lseek(fd, -len, SEEK_CUR);
 133      while (i<len) {
 134        bin = utf8towc(&wc, buf+i, len-i);
 135        if (bin == -2) i = len;
 136        if (bin<1) break;
 137        i += bin;
 138      }
 139      bin = i!=len;
 140    }
 141    if (bin && FLAG(I)) return;
 142  }
 143
 144  if (!(file = fdopen(fd, "r"))) return perror_msg("%s", name);
 145
 146  // Loop through lines of input
 147  for (;;) {
 148    char *line = 0, *start;
 149    struct reg *shoe;
 150    size_t ulen;
 151    long len;
 152    int matched = 0, rc = 1;
 153
 154    // get next line, check and trim delimiter
 155    lcount++;
 156    errno = 0;
 157    ulen = len = getdelim(&line, &ulen, TT.indelim, file);
 158    if (errno) perror_msg("%s", name);
 159    if (len<1) break;
 160    if (line[ulen-1] == TT.indelim) line[--ulen] = 0;
 161
 162    // Prepare for next line
 163    start = line;
 164    if (TT.reg) for (shoe = (void *)TT.reg; shoe; shoe = shoe->next)
 165      shoe->rc = 0;
 166
 167    // Loop to handle multiple matches in same line
 168    do {
 169      regmatch_t *mm = (void *)toybuf;
 170
 171      // Handle "fixed" (literal) matches
 172      if (FLAG(F)) {
 173        struct arg_list *seek, fseek;
 174        char *s = 0;
 175
 176        for (seek = TT.e; seek; seek = seek->next) {
 177          if (FLAG(x)) {
 178            if (!(FLAG(i) ? strcasecmp : strcmp)(seek->arg, line)) s = line;
 179          } else if (!*seek->arg) {
 180            // No need to set fseek.next because this will match every line.
 181            seek = &fseek;
 182            fseek.arg = s = line;
 183          } else if (FLAG(i)) s = strcasestr(start, seek->arg);
 184          else s = strstr(start, seek->arg);
 185
 186          if (s) break;
 187        }
 188
 189        if (s) {
 190          rc = 0;
 191          mm->rm_so = (s-start);
 192          mm->rm_eo = (s-start)+strlen(seek->arg);
 193        } else rc = 1;
 194
 195      // Handle regex matches
 196      } else {
 197        int baseline = mm->rm_eo;
 198
 199        mm->rm_so = mm->rm_eo = INT_MAX;
 200        rc = 1;
 201        for (shoe = (void *)TT.reg; shoe; shoe = shoe->next) {
 202
 203          // Do we need to re-check this regex?
 204          if (!shoe->rc) {
 205            shoe->m.rm_so -= baseline;
 206            shoe->m.rm_eo -= baseline;
 207            if (!matched || shoe->m.rm_so<0)
 208              shoe->rc = regexec0(&shoe->r, start, ulen-(start-line), 1,
 209                                  &shoe->m, start==line ? 0 : REG_NOTBOL);
 210          }
 211
 212          // If we got a match, is it a _better_ match?
 213          if (!shoe->rc && (shoe->m.rm_so < mm->rm_so ||
 214              (shoe->m.rm_so == mm->rm_so && shoe->m.rm_eo >= mm->rm_eo)))
 215          {
 216            mm = &shoe->m;
 217            rc = 0;
 218          }
 219        }
 220      }
 221
 222      if (!rc && FLAG(x))
 223        if (mm->rm_so || line[mm->rm_eo]) rc = 1;
 224
 225      if (!rc && FLAG(w)) {
 226        char c = 0;
 227
 228        if ((start+mm->rm_so)!=line) {
 229          c = start[mm->rm_so-1];
 230          if (!isalnum(c) && c != '_') c = 0;
 231        }
 232        if (!c) {
 233          c = start[mm->rm_eo];
 234          if (!isalnum(c) && c != '_') c = 0;
 235        }
 236        if (c) {
 237          start += mm->rm_so+1;
 238          continue;
 239        }
 240      }
 241
 242      if (FLAG(v)) {
 243        if (FLAG(o)) {
 244          if (rc) {
 245            mm->rm_so = 0;
 246            mm->rm_eo = ulen-(start-line);
 247          } else if (!mm->rm_so) {
 248            start += mm->rm_eo;
 249            continue;
 250          } else mm->rm_eo = mm->rm_so;
 251        } else {
 252          if (!rc) break;
 253          mm->rm_eo = ulen-(start-line);
 254        }
 255        mm->rm_so = 0;
 256      } else if (rc) break;
 257
 258      // At least one line we didn't print since match while -ABC active
 259      if (bars) {
 260        xputs(bars);
 261        bars = 0;
 262      }
 263      matched++;
 264      TT.found = 1;
 265      if (FLAG(q)) {
 266        toys.exitval = 0;
 267        xexit();
 268      }
 269      if (FLAG(l)) {
 270        xprintf("%s%c", name, TT.outdelim);
 271        free(line);
 272        fclose(file);
 273        return;
 274      }
 275
 276      if (!FLAG(c)) {
 277        long bcount = 1 + offset + (start-line) + (FLAG(o) ? mm->rm_so : 0);
 278
 279        if (bin) printf("Binary file %s matches\n", name);
 280        else if (FLAG(o))
 281          outline(start+mm->rm_so, ':', name, lcount, bcount,
 282                  mm->rm_eo-mm->rm_so);
 283        else {
 284          while (dlb) {
 285            struct double_list *dl = dlist_pop(&dlb);
 286            unsigned *uu = (void *)(dl->data+(strlen(dl->data)|3)+1);
 287
 288            outline(dl->data, '-', name, lcount-before, uu[0]+1, uu[1]);
 289            free(dl->data);
 290            free(dl);
 291            before--;
 292          }
 293
 294          if (matched==1)
 295            outline(FLAG(color) ? 0 : line, ':', name, lcount, bcount, ulen);
 296          if (FLAG(color)) {
 297            xputsn(TT.grey);
 298            if (mm->rm_so) xputsl(line, mm->rm_so);
 299            xputsn(TT.red);
 300            xputsl(line+mm->rm_so, mm->rm_eo-mm->rm_so);
 301          }
 302
 303          if (TT.A) after = TT.A+1;
 304        }
 305      }
 306
 307      start += mm->rm_eo;
 308      if (mm->rm_so == mm->rm_eo) break;
 309      if (!FLAG(o) && FLAG(color)) break;
 310    } while (*start);
 311    offset += len;
 312
 313    if (matched) {
 314      // Finish off pending line color fragment.
 315      if (FLAG(color) && !FLAG(o)) {
 316        xputsn(TT.grey);
 317        if (ulen > start-line) xputsl(start, ulen-(start-line));
 318        xputc(TT.outdelim);
 319      }
 320      mcount++;
 321    } else {
 322      int discard = (after || TT.B);
 323
 324      if (after && --after) {
 325        outline(line, '-', name, lcount, 0, ulen);
 326        discard = 0;
 327      }
 328      if (discard && TT.B) {
 329        unsigned *uu, ul = (ulen|3)+1;
 330
 331        line = xrealloc(line, ul+8);
 332        uu = (void *)(line+ul);
 333        uu[0] = offset-len;
 334        uu[1] = ulen;
 335        dlist_add(&dlb, line);
 336        line = 0;
 337        if (++before>TT.B) {
 338          struct double_list *dl;
 339
 340          dl = dlist_pop(&dlb);
 341          free(dl->data);
 342          free(dl);
 343          before--;
 344        } else discard = 0;
 345      }
 346      // If we discarded a line while displaying context, show bars before next
 347      // line (but don't show them now in case that was last match in file)
 348      if (discard && mcount) bars = "--";
 349    }
 350    free(line);
 351
 352    if (FLAG(m) && mcount >= TT.m) break;
 353  }
 354
 355  if (FLAG(c)) outline(0, ':', name, mcount, 0, 1);
 356
 357  // loopfiles will also close the fd, but this frees an (opaque) struct.
 358  fclose(file);
 359  while (dlb) {
 360    struct double_list *dl = dlist_pop(&dlb);
 361
 362    free(dl->data);
 363    free(dl);
 364  }
 365}
 366
 367static void parse_regex(void)
 368{
 369  struct arg_list *al, *new, *list = NULL;
 370  char *s, *ss;
 371
 372  // Add all -f lines to -e list. (Yes, this is leaking allocation context for
 373  // exit to free. Not supporting nofork for this command any time soon.)
 374  al = TT.f ? TT.f : TT.e;
 375  while (al) {
 376    if (TT.f) s = ss = xreadfile(al->arg, 0, 0);
 377    else s = ss = al->arg;
 378
 379    // Split lines at \n, add individual lines to new list.
 380    do {
 381// TODO: NUL terminated input shouldn't split -e at \n
 382      ss = strchr(s, '\n');
 383      if (ss) *(ss++) = 0;
 384      new = xmalloc(sizeof(struct arg_list));
 385      new->next = list;
 386      new->arg = s;
 387      list = new;
 388      s = ss;
 389    } while (ss && *s);
 390
 391    // Advance, when we run out of -f switch to -e.
 392    al = al->next;
 393    if (!al && TT.f) {
 394      TT.f = 0;
 395      al = TT.e;
 396    }
 397  }
 398  TT.e = list;
 399
 400  if (!FLAG(F)) {
 401    // Convert regex list
 402    for (al = TT.e; al; al = al->next) {
 403      struct reg *shoe;
 404
 405      if (FLAG(o) && !*al->arg) continue;
 406      dlist_add_nomalloc(&TT.reg, (void *)(shoe = xmalloc(sizeof(struct reg))));
 407      xregcomp(&shoe->r, al->arg,
 408               (REG_EXTENDED*!!FLAG(E))|(REG_ICASE*!!FLAG(i)));
 409    }
 410    dlist_terminate(TT.reg);
 411  }
 412}
 413
 414static int do_grep_r(struct dirtree *new)
 415{
 416  struct arg_list *al;
 417  char *name;
 418
 419  if (!new->parent) TT.tried++;
 420  if (!dirtree_notdotdot(new)) return 0;
 421  if (S_ISDIR(new->st.st_mode)) {
 422    for (al = TT.exclude_dir; al; al = al->next)
 423      if (!fnmatch(al->arg, new->name, 0)) return 0;
 424    return DIRTREE_RECURSE|(FLAG(R)?DIRTREE_SYMFOLLOW:0);
 425  }
 426  if (TT.S || TT.M) {
 427    for (al = TT.S; al; al = al->next)
 428      if (!fnmatch(al->arg, new->name, 0)) return 0;
 429
 430    if (TT.M) {
 431      for (al = TT.M; al; al = al->next)
 432        if (!fnmatch(al->arg, new->name, 0)) break;
 433
 434      if (!al) return 0;
 435    }
 436  }
 437
 438  // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should.
 439  if (new->parent && !FLAG(h)) toys.optflags |= FLAG_H;
 440
 441  name = dirtree_path(new, 0);
 442  do_grep(openat(dirtree_parentfd(new), new->name, 0), name);
 443  free(name);
 444
 445  return 0;
 446}
 447
 448void grep_main(void)
 449{
 450  char **ss = toys.optargs;
 451
 452  if (FLAG(color) && (!TT.color || !strcmp(TT.color, "auto")) && !isatty(1))
 453    toys.optflags &= ~FLAG_color;
 454
 455  if (FLAG(color)) {
 456    TT.purple = "\033[35m";
 457    TT.cyan = "\033[36m";
 458    TT.red = "\033[1;31m";
 459    TT.green = "\033[32m";
 460    TT.grey = "\033[0m";
 461  } else TT.purple = TT.cyan = TT.red = TT.green = TT.grey = "";
 462
 463  if (FLAG(R)) toys.optflags |= FLAG_r;
 464
 465  // Grep exits with 2 for errors
 466  toys.exitval = 2;
 467
 468  if (!TT.A) TT.A = TT.C;
 469  if (!TT.B) TT.B = TT.C;
 470
 471  TT.indelim = '\n' * !FLAG(z);
 472  TT.outdelim = '\n' * !FLAG(Z);
 473
 474  // Handle egrep and fgrep
 475  if (*toys.which->name == 'e') toys.optflags |= FLAG_E;
 476  if (*toys.which->name == 'f') toys.optflags |= FLAG_F;
 477
 478  if (!TT.e && !TT.f) {
 479    if (!*ss) error_exit("no REGEX");
 480    TT.e = xzalloc(sizeof(struct arg_list));
 481    TT.e->arg = *(ss++);
 482    toys.optc--;
 483  }
 484
 485  parse_regex();
 486
 487  if (!FLAG(h) && toys.optc>1) toys.optflags |= FLAG_H;
 488
 489  if (FLAG(s)) {
 490    close(2);
 491    xopen_stdio("/dev/null", O_RDWR);
 492  }
 493
 494  if (FLAG(r)) {
 495    // Iterate through -r arguments. Use "." as default if none provided.
 496    for (ss = *ss ? ss : (char *[]){".", 0}; *ss; ss++) {
 497      if (!strcmp(*ss, "-")) do_grep(0, *ss);
 498      else dirtree_read(*ss, do_grep_r);
 499    }
 500  } else loopfiles_rw(ss, O_RDONLY|WARN_ONLY, 0, do_grep);
 501  if (TT.tried >= toys.optc || (FLAG(q)&&TT.found)) toys.exitval = !TT.found;
 502}
 503