toybox/toys/pending/diff.c
<<
>>
Prefs
   1/* diff.c - compare files line by line
   2 *
   3 * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
   4 * Copyright 2014 Ashwini Kumar <ak.ashwini1981@gmail.com>
   5 *
   6 * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html
   7 * and https://www.cs.dartmouth.edu/~doug/diff.pdf
   8 *
   9 * Deviations from posix: always does -u
  10
  11USE_DIFF(NEWTOY(diff, "<2>2(unchanged-line-format):;(old-line-format):;(new-line-format):;(color)(strip-trailing-cr)B(ignore-blank-lines)d(minimal)b(ignore-space-change)ut(expand-tabs)w(ignore-all-space)i(ignore-case)T(initial-tab)s(report-identical-files)q(brief)a(text)S(starting-file):L(label)*N(new-file)r(recursive)U(unified)#<0=3", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
  12
  13config DIFF
  14  bool "diff"
  15  default n
  16  help
  17  usage: diff [-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2
  18
  19  -a    Treat all files as text
  20  -b    Ignore changes in the amount of whitespace
  21  -B    Ignore changes whose lines are all blank
  22  -d    Try hard to find a smaller set of changes
  23  -i    Ignore case differences
  24  -L    Use LABEL instead of the filename in the unified header
  25  -N    Treat absent files as empty
  26  -q    Output only whether files differ
  27  -r    Recurse
  28  -S    Start with FILE when comparing directories
  29  -s    Report when two files are the same
  30  -T    Make tabs line up by prefixing a tab when necessary
  31  -t    Expand tabs to spaces in output
  32  -u    Unified diff
  33  -U    Output LINES lines of context
  34  -w    Ignore all whitespace
  35
  36  --color     Color output   --strip-trailing-cr   Strip '\r' from input lines
  37  --TYPE-line-format=FORMAT  Display TYPE (unchanged/old/new) lines using FORMAT
  38    FORMAT uses printf integer escapes (ala %-2.4x) followed by LETTER: FELMNn
  39  Supported format specifiers are:
  40  * %l, the contents of the line, without the trailing newline
  41  * %L, the contents of the line, including the trailing newline
  42  * %%, the character '%'
  43*/
  44
  45#define FOR_diff
  46#include "toys.h"
  47
  48GLOBALS(
  49  long U;
  50  struct arg_list *L;
  51  char *S, *new_line_format, *old_line_format, *unchanged_line_format;
  52
  53  int dir_num, size, is_binary, differ, change, len[2], *offset[2];
  54  struct stat st[2];
  55  struct {
  56    char **list;
  57    int nr_elm;
  58  } dir[2];
  59  struct {
  60    FILE *fp;
  61    int len;
  62  } file[2];
  63)
  64
  65#define IS_STDIN(s)     (*(s)=='-' && !(s)[1])
  66
  67struct v_vector {
  68  unsigned serial:31;
  69  unsigned last:1;
  70  union {
  71    unsigned hash;
  72    unsigned p;
  73  };
  74};
  75
  76struct diff {
  77  long a, b, c, d, prev, suff;
  78};
  79
  80struct candidate {
  81  struct candidate *next, *prev;
  82  int a, b;
  83};
  84
  85enum {
  86  empty = 1 << 9,
  87  eol = 1 << 10,
  88  eof = 1 << 11,
  89  space = 1 << 12
  90};
  91
  92static int comp(void *a, void *b)
  93{
  94  int i = ((struct v_vector *)a)->hash - ((struct v_vector *)b)->hash;
  95
  96  return i ? : ((struct v_vector *)a)->serial - ((struct v_vector *)b)->serial;
  97}
  98
  99static int search(struct candidate **K, int r, int k, int j)
 100{
 101  int low = r, upper = k, mid;
 102
 103  while (low<=(mid = (low+upper)/2)) {
 104    if (K[mid]->b < j && K[mid + 1]->b > j) return mid;
 105    if (K[mid]->b < j) low = mid + 1;
 106    else if (K[mid]->b > j) upper = mid - 1;
 107    else return -1;
 108  }
 109  return -1;
 110}
 111
 112static struct candidate *new_candidate(int i, int j, struct candidate *prev)
 113{
 114  struct candidate *c = xzalloc(sizeof(struct candidate));
 115
 116  c->a = i;
 117  c->b = j;
 118  c->prev = prev;
 119  return c;
 120}
 121
 122/* 1. Search K[r: k] for an element K[s] such that K[s]-> b < j and K[s + 1]->b > j
 123 * 2. if found do
 124 *  2.a. If K[s + 1]->b > j do K[r] = c; r = s+1 and c = candidate(i, j, K[s]) //we have a candidate
 125 *  2.b. if s = k (fence reached move it further) do K[k + 2] = K[k + 1], k++
 126 * 3. if E[p].last true break i.e we have reached at the end of an equiv class
 127 *    else p = p + 1 //keep traversing the equiv class.
 128 * 4. K[r] = c //Save the sucessfully filled k-candidate.
 129 */
 130static void do_merge(struct candidate **K, int *k, int i,
 131    struct v_vector *E, int p)
 132{
 133  int r = 0, s, j;
 134  struct candidate *pr = 0, *c = K[0];
 135
 136  for (;;) {
 137    j = E[p].serial;
 138    s = search(K, r, *k, j);
 139    if (s>=0 && K[s]->b<j && K[s+1]->b>j) {
 140      if (K[s+1]->b>j) {
 141        pr = K[s];
 142        if (r && K[r]) c->next = K[r];
 143        K[r] = c;
 144        r = s+1;
 145        c = new_candidate(i , j, pr);
 146      }
 147      if (s == *k) {
 148        ++*k;
 149        K[*k+1] = K[*k];
 150        break;
 151      }
 152    }
 153    if (E[p].last) break;
 154    else p++;
 155  }
 156  K[r] = c;
 157}
 158
 159static int read_tok(FILE *fp, off_t *off, int tok)
 160{
 161  int t = 0, is_space;
 162
 163  tok |= empty;
 164  while (!(tok & eol)) {
 165    t = fgetc(fp);
 166
 167    if (FLAG(strip_trailing_cr) && t == '\r') {
 168      int t2 = fgetc(fp);
 169      if (t2 == '\n') {
 170        t = t2;
 171        if (off) (*off)++;
 172      } else {
 173        ungetc(t2, fp);
 174      }
 175    }
 176
 177    if (off && t != EOF) *off += 1;
 178    is_space = isspace(t) || (t == EOF);
 179    tok |= (t & (eof + eol)); //set tok eof+eol when t is eof
 180
 181    if (t == '\n') tok |= eol;
 182    if (FLAG(i)) if (t >= 'A' && t <= 'Z') t = tolower(t);
 183
 184    if (FLAG(w) && is_space) continue;
 185
 186    if (FLAG(b)) {
 187      if (tok & space) {
 188        if (is_space) continue;
 189        tok &= ~space;
 190      } else if (is_space) t = space + ' ';
 191    }
 192    tok &= ~(empty + 0xff);  //remove empty and char too.
 193    tok |= t; //add most recent char
 194    break;
 195  }
 196
 197  return tok;
 198}
 199
 200int bcomp(void *a, void *b)
 201{
 202  struct v_vector *l = (struct v_vector *)a, *r = (struct v_vector *)b;
 203
 204  return (l->hash-r->hash) ? : r[-1].last ? 0 : -1;
 205}
 206
 207/*  file[0] corresponds file 1 and file[1] correspond file 2.
 208 * 1. calc hashes for both the files and store them in vector(v[0], v[1])
 209 * 2. sort file[1] with hash as primary and serial as sec. key
 210 * 3. Form the equivalance class of file[1] stored in e vector. It lists all the equivalence
 211 *    classes of lines in file[1], with e.last = true on the last element of each class.
 212 *    The elements are ordered by serial within classes.
 213 * 4. Form the p vector stored in  p_vector. p_vector[i], if non-zero, now points in e vector
 214 *    to the beginning of the equiv class of lines in file[1] equivalent to line
 215 *    i in file[0].
 216 * 5. Form the k-candidates as discribed in do_merge.
 217 * 6. Create a vector J[i] = j, such that i'th line in file[0] is j'th line of
 218 *    file[1], i.e J comprises LCS
 219 */
 220static int *create_j_vector()
 221{
 222  int tok, i, j, size = 100, k;
 223  off_t off;
 224  long hash;
 225  int *p_vector, *J;
 226  struct v_vector *v[2], *e;
 227  struct candidate **kcand, *pr;
 228
 229  for (i = 0; i < 2; i++) {
 230    tok = off = 0;
 231    hash = 5831;
 232    v[i] = xzalloc(size * sizeof(struct v_vector));
 233    TT.offset[i] = xzalloc(size * sizeof(int));
 234    TT.file[i].len = 0;
 235    if (fseek(TT.file[i].fp, 0, SEEK_SET)) perror_exit("fseek failed");
 236
 237    while (1) {
 238      tok  = read_tok(TT.file[i].fp, &off, tok);
 239      if (!(tok & empty)) {
 240        hash = ((hash << 5) + hash) + (tok & 0xff);
 241        continue;
 242      }
 243
 244      if (size == ++TT.file[i].len) {
 245        size = size * 11 / 10;
 246        v[i] = xrealloc(v[i], size*sizeof(struct v_vector));
 247        TT.offset[i] = xrealloc(TT.offset[i], size*sizeof(int));
 248      }
 249
 250      v[i][TT.file[i].len].hash = hash & INT_MAX;
 251      TT.offset[i][TT.file[i].len] = off;
 252      if ((tok & eof)) {
 253        TT.offset[i][TT.file[i].len] = ++off;
 254        break;
 255      }
 256      hash = 5831;  //next line
 257      tok = 0;
 258    }
 259    if (TT.offset[i][TT.file[i].len]-TT.offset[i][TT.file[i].len-1] == 1)
 260      TT.file[i].len--;
 261  }
 262
 263  for (i = 0; i<=TT.file[1].len; i++) v[1][i].serial = i;
 264  qsort(v[1]+1, TT.file[1].len, sizeof(struct v_vector), (void *)comp);
 265
 266  e = v[1];
 267  e[0].serial = 0;
 268  e[0].last = 1;
 269  for (i = 1; i<=TT.file[1].len; i++)
 270    e[i].last = i==TT.file[1].len || v[1][i].hash!=v[1][i+1].hash;
 271
 272  p_vector = xzalloc((TT.file[0].len+2)*sizeof(int));
 273  for (i = 1; i<=TT.file[0].len; i++) {
 274    void *r = bsearch(&v[0][i], e+1, TT.file[1].len, sizeof(*e), (void *)bcomp);
 275    if (r) p_vector[i] = (struct v_vector *)r - e;
 276  }
 277
 278  for (i = 1; i<=TT.file[0].len; i++) e[i].p = p_vector[i];
 279  free(p_vector);
 280
 281  size = 100;
 282  kcand = xzalloc(size * sizeof(struct candidate *));
 283
 284  kcand[0] = new_candidate(0 , 0, 0);
 285  kcand[1] = new_candidate(TT.file[0].len+1, TT.file[1].len+1, 0); //the fence
 286
 287  k = 0;  //last successfully filled k candidate.
 288  for (i = 1; i<=TT.file[0].len; i++) {
 289    if (!e[i].p) continue;
 290    if ((size - 2) == k) {
 291      size = size * 11 / 10;
 292      kcand = xrealloc(kcand, (size*sizeof(struct candidate *)));
 293    }
 294    do_merge(kcand, &k, i, e, e[i].p);
 295  }
 296  free(v[0]); //no need for v_vector now.
 297  free(v[1]);
 298
 299  J = xzalloc((TT.file[0].len+2)*sizeof(int));
 300
 301  for (pr = kcand[k]; pr; pr = pr->prev) J[pr->a] = pr->b;
 302  J[TT.file[0].len+1] = TT.file[1].len+1; //mark boundary
 303
 304  for (i = k+1; i>=0; i--) llist_traverse(kcand[i], free);
 305  free(kcand);
 306
 307  for (i = 1; i<=TT.file[0].len; i++) { // jackpot?
 308    if (!J[i]) continue;
 309
 310    if (fseek(TT.file[0].fp, TT.offset[0][i-1], SEEK_SET)
 311     || fseek(TT.file[1].fp, TT.offset[1][J[i]-1], SEEK_SET))
 312       perror_exit("fseek");
 313
 314    for (j = J[i]; i<=TT.file[0].len && J[i]==j; i++, j++) {
 315      int tok0 = 0, tok1 = 0;
 316
 317      do {
 318        tok0 = read_tok(TT.file[0].fp, NULL, tok0);
 319        tok1 = read_tok(TT.file[1].fp, NULL, tok1);
 320        if (((tok0 ^ tok1) & empty) || ((tok0 & 0xff) != (tok1 & 0xff)))
 321          J[i] = 0;
 322      } while (!(tok0 & tok1 & empty));
 323    }
 324  }
 325  return J;
 326}
 327
 328static int *diff(char **files)
 329{
 330  size_t i ,j;
 331  int s, t;
 332  char *bufi, *bufj;
 333
 334  TT.is_binary = 0; //loop calls to diff
 335  TT.differ = 0;
 336
 337  for (i = 0; i < 2; i++) {
 338    if ((j = !strcmp(files[i], "-")) || S_ISFIFO(TT.st[i].st_mode)) {
 339      char *tmp_name;
 340      int srcfd = j ? 0 : open(files[i], O_RDONLY),
 341        tmpfd = xtempfile("fifo", &tmp_name);
 342
 343      unlink(tmp_name);
 344      free(tmp_name);
 345
 346      xsendfile(srcfd, tmpfd);
 347      if (!j) close(srcfd);
 348      TT.file[i].fp = fdopen(tmpfd, "r");
 349    } else TT.file[i].fp = fopen(files[i], "r");
 350
 351    if (!TT.file[i].fp) {
 352      perror_msg("%s", files[i]);
 353      TT.differ = 2;
 354      return 0; //return SAME
 355    }
 356  }
 357
 358  s = sizeof(toybuf)/2;
 359  bufi = toybuf;
 360  bufj = toybuf+s;
 361
 362  if (fseek(TT.file[0].fp, 0, SEEK_SET) || fseek(TT.file[1].fp, 0, SEEK_SET))
 363    perror_exit("fseek");
 364
 365  if (FLAG(a)) return create_j_vector();
 366
 367  while (1) {
 368    i = fread(bufi, 1, s, TT.file[0].fp);
 369    j = fread(bufj, 1, s, TT.file[1].fp);
 370
 371    if (i != j) TT.differ = 1;
 372
 373    for (t = 0; t < i && !TT.is_binary; t++) if (!bufi[t]) TT.is_binary = 1;
 374    for (t = 0; t < j && !TT.is_binary; t++) if (!bufj[t]) TT.is_binary = 1;
 375
 376    i = minof(i, j);
 377    for (t = 0; t < i; t++) if (bufi[t] != bufj[t]) TT.differ = 1;
 378
 379    if (!i || !j) break;
 380  }
 381  if (TT.is_binary || !TT.differ) return 0;
 382
 383  return create_j_vector();
 384}
 385
 386static void print_diff(int a, int b, char c, int *off_set, FILE *fp)
 387{
 388  int i, j, cc, cl;
 389  char *reset = 0, *fmt = 0;
 390
 391  if (!TT.new_line_format && c!=' ' && FLAG(color)) {
 392    printf("\e[%dm", 31+(c=='+'));
 393    reset = "\e[0m";
 394  }
 395
 396  for (i = a; i <= b; i++) {
 397    if (fseek(fp, off_set[i - 1], SEEK_SET)) perror_exit("fseek failed");
 398    if (TT.new_line_format) {
 399      if (c == '+') fmt = TT.new_line_format;
 400      else if (c == '-') fmt = TT.old_line_format;
 401      else fmt = TT.unchanged_line_format;
 402      while (*fmt) {
 403        if (*fmt == '%') {
 404          fmt++;
 405          char f = *fmt++;
 406          if (f == '%') putchar('%');
 407          else if (f == 'l' || f == 'L') {
 408            for (j = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
 409              cc = fgetc(fp);
 410              if (cc == EOF) break;
 411              if (cc != '\n' || f == 'L') putchar(cc);
 412            }
 413          } else error_exit("Unrecognized format specifier %%%c", f);
 414        } else putchar(*fmt++);
 415      }
 416      continue;
 417    }
 418    putchar(c);
 419    if (FLAG(T)) putchar('\t');
 420    for (j = 0, cl = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
 421      cc = fgetc(fp);
 422      if (cc == EOF) {
 423        printf("%s\n\\ No newline at end of file\n", reset ? : "");
 424        return;
 425      }
 426      if ((cc == '\t') && FLAG(t)) do putchar(' '); while (++cl & 7);
 427      else {
 428        putchar(cc); //xputc has calls to fflush, it hurts performance badly.
 429        cl++;
 430      }
 431    }
 432  }
 433  if (reset) xputsn(reset);
 434}
 435
 436static char *concat_file_path(char *path, char *default_path)
 437{
 438  char *final_path;
 439
 440  if ('/' == path[strlen(path) - 1]) {
 441    while (*default_path == '/') ++default_path;
 442    final_path = xmprintf("%s%s", path, default_path);
 443  }
 444  else if (*default_path != '/')
 445    final_path = xmprintf("%s/%s", path, default_path);
 446  else final_path = xmprintf("%s%s", path, default_path);
 447  return final_path;
 448}
 449
 450static int skip(struct dirtree *node)
 451{
 452  int len = strlen(toys.optargs[TT.dir_num]), ret = 0;
 453  char *tmp = NULL, *ptr, *f_path = dirtree_path(node, NULL);
 454  struct stat st;
 455
 456  ptr = f_path;
 457  ptr += len;
 458  if (ptr[0]) {
 459    tmp = concat_file_path(toys.optargs[1 - TT.dir_num], ptr);
 460    if (tmp && !stat(tmp, &st)) ret = 0; //it is there on other side
 461    else ret = 1; //not present on other side.
 462  }
 463  free(f_path);
 464  if (tmp) free(tmp);
 465  return ret; //add otherwise
 466}
 467
 468static void add_to_list(struct dirtree *node)
 469{
 470  char *full_path;
 471
 472  TT.dir[TT.dir_num].list = xrealloc(TT.dir[TT.dir_num].list,
 473      (TT.size + 1)*sizeof(char*));
 474  TT.size++;
 475  full_path = dirtree_path(node, NULL);
 476  TT.dir[TT.dir_num].list[TT.size - 1] = full_path;
 477}
 478
 479static int list_dir(struct dirtree *node)
 480{
 481  int ret = 0;
 482
 483  if (!dirtree_notdotdot(node)) return 0;
 484
 485  if (S_ISDIR(node->st.st_mode) && !node->parent) { //add root dirs.
 486    add_to_list(node);
 487    return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
 488  }
 489
 490  if (S_ISDIR(node->st.st_mode) && FLAG(r)) {
 491    if (!FLAG(N)) ret = skip(node);
 492    if (!ret) return DIRTREE_RECURSE|DIRTREE_SYMFOLLOW;
 493    else {
 494      add_to_list(node); //only at one side.
 495      return 0;
 496    }
 497  } else {
 498    add_to_list(node);
 499    return S_ISDIR(node->st.st_mode) ? 0 : (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
 500  }
 501}
 502
 503static int cmp(void *p1, void *p2)
 504{
 505   return strcmp(*(char **)p1, *(char **)p2);
 506}
 507
 508// quote and escape filenames that have awkward characters
 509char *quote_filename(char *filename)
 510{
 511  char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\", *s, *t=0, *u;
 512  int len, quote = 0;
 513
 514  for (;;) {
 515    // measure escapes on first pass, write on second
 516    len = 0;
 517    for (s = filename; *s; s++) {
 518      if ((u = strchr(from, *s))) {
 519        if (t) t[len] = '\\', t[len+1] = to[u-from];
 520        len += 2;
 521      } else if (*s<0x20 || *s>=0x80)
 522        len += snprintf(t+len, 5*!!t, "\\%.3o", *s);
 523      else {
 524        if (t) t[len] = *s;
 525        len++;
 526      }
 527    }
 528    if (t) {
 529      if (quote) t[len++] = '"';
 530      t[len] = 0;
 531
 532      return t-quote;
 533    }
 534
 535    // construct the new string
 536    quote = strlen(filename)!=len || strchr(filename, ' ');
 537    t = xmalloc(len+1+2*quote);
 538    if (quote) *t++ = '"';
 539  }
 540}
 541
 542static void show_label(char *prefix, char *filename, struct stat *sb)
 543{
 544  char date[36];
 545  char *quoted_file;
 546
 547  quoted_file = quote_filename(filename);
 548  printf("%s %s\t%s\n", prefix, quoted_file,
 549    format_iso_time(date, sizeof(date), &sb->st_mtim));
 550  free(quoted_file);
 551}
 552
 553static void do_diff(char **files)
 554{
 555  long i = 1, size = 1, x = 0, change = 0, ignore_white,
 556   start1, end1, start2, end2;
 557  struct diff *d;
 558  struct arg_list *llist = TT.L;
 559  int *J;
 560  
 561  TT.offset[0] = TT.offset[1] = NULL;
 562  J = diff(files);
 563
 564  if (!J) return; //No need to compare, have to status only
 565
 566  d = xzalloc(size *sizeof(struct diff));
 567  do {
 568    ignore_white = 0;
 569    for (d[x].a = i; d[x].a<=TT.file[0].len; d[x].a++) {
 570      if (J[d[x].a] != (J[d[x].a - 1] + 1)) break;
 571      else continue;
 572    }
 573    d[x].c = (J[d[x].a - 1] + 1);
 574
 575    for (d[x].b = (d[x].a - 1); d[x].b<=TT.file[0].len; d[x].b++) {
 576      if (J[d[x].b + 1]) break;
 577      else continue;
 578    }
 579    d[x].d = (J[d[x].b + 1] - 1);
 580
 581    if (FLAG(B)) {
 582      if (d[x].a <= d[x].b) {
 583        if ((TT.offset[0][d[x].b] - TT.offset[0][d[x].a - 1])
 584            == (d[x].b - d[x].a + 1))
 585          ignore_white = 1;
 586      } else if (d[x].c <= d[x].d){
 587        if ((TT.offset[1][d[x].d] - TT.offset[1][d[x].c - 1])
 588            == (d[x].d - d[x].c + 1))
 589          ignore_white = 1;
 590      }
 591    }
 592
 593    //is we have diff ?   TODO: lolcat?
 594    if ((d[x].a <= d[x].b || d[x].c <= d[x].d) && !ignore_white) change = 1;
 595
 596    if (!ignore_white) d = xrealloc(d, (x + 2) *sizeof(struct diff));
 597    i = d[x].b + 1;
 598    if (i>TT.file[0].len) break;
 599    J[d[x].b] = d[x].d;
 600    if (!ignore_white) x++;
 601  } while (i<=TT.file[0].len);
 602
 603  i = x+1;
 604  TT.differ = change; //update status, may change bcoz of -w etc.
 605
 606  if (!FLAG(q) && change) {
 607    if (!TT.new_line_format) {
 608      if (FLAG(color)) printf("\e[1m");
 609      if (FLAG(L)) printf("--- %s\n", llist->arg);
 610      else show_label("---", files[0], &(TT).st[0]);
 611      if (!FLAG(L) || !llist->next) show_label("+++", files[1], &(TT).st[1]);
 612      else {
 613        while (llist->next) llist = llist->next;
 614        printf("+++ %s\n", llist->arg);
 615      }
 616      if (FLAG(color)) printf("\e[0m");
 617    }
 618
 619    struct diff *t, *ptr1 = d, *ptr2 = d;
 620    while (i) {
 621      long a,b;
 622
 623      // trim context to file len.
 624      if (TT.new_line_format || TT.U>TT.file[0].len) TT.U = TT.file[0].len;
 625      if (ptr1->b < ptr1->a && ptr1->d < ptr1->c) {
 626        i--;
 627        continue;
 628      }
 629      //Handle the context stuff
 630      a =  ptr1->a;
 631      b = minof(TT.file[0].len, ptr1->b);
 632      if (i == x + 1) ptr1->suff = maxof(1, a-TT.U);
 633      else if (ptr1[-1].prev >= ptr1->a-TT.U) ptr1->suff = ptr1[-1].prev+1;
 634      else ptr1->suff =  ptr1->a-TT.U;
 635calc_ct:
 636      if (i > 1) {
 637        if ((ptr2->b + TT.U) >= (ptr2  + 1)->a) {
 638          ptr2++;
 639          i--;
 640          goto calc_ct;
 641        } else ptr2->prev = ptr2->b + TT.U;
 642      } else ptr2->prev = ptr2->b;
 643      start1 = (ptr2->prev - ptr1->suff + 1);
 644      end1 = (start1 == 1) ? -1 : start1;
 645      start2 = maxof(1, ptr1->c - (ptr1->a - ptr1->suff));
 646      end2 = ptr2->prev - ptr2->b + ptr2->d;
 647
 648      if (!TT.new_line_format) {
 649        if (FLAG(color)) printf("\e[36m");
 650        printf("@@ -%ld", start1 ? ptr1->suff: (ptr1->suff -1));
 651        if (end1 != -1) printf(",%ld ", ptr2->prev-ptr1->suff + 1);
 652        else putchar(' ');
 653
 654        printf("+%ld", (end2 - start2 + 1) ? start2: (start2 -1));
 655        if ((end2 - start2 +1) != 1) printf(",%ld ", (end2 - start2 +1));
 656        else putchar(' ');
 657        printf("@@");
 658        if (FLAG(color)) printf("\e[0m");
 659        putchar('\n');
 660      }
 661
 662      for (t = ptr1; t <= ptr2; t++) {
 663        if (t==ptr1) print_diff(t->suff, t->a-1, ' ', TT.offset[0], TT.file[0].fp);
 664        print_diff(t->a, t->b, '-', TT.offset[0], TT.file[0].fp);
 665        print_diff(t->c, t->d, '+', TT.offset[1], TT.file[1].fp);
 666        if (t == ptr2)
 667          print_diff(t->b+1, (t)->prev, ' ', TT.offset[0], TT.file[0].fp);
 668        else print_diff(t->b+1, (t+1)->a-1, ' ', TT.offset[0], TT.file[0].fp);
 669      }
 670      ptr2++;
 671      ptr1 = ptr2;
 672      i--;
 673    } //end of while
 674  } //End of !FLAG_q
 675  free(d);
 676  free(J);
 677  free(TT.offset[0]);
 678  free(TT.offset[1]);
 679}
 680
 681static void show_status(char **files)
 682{
 683  if (TT.differ==2) return; // TODO: needed?
 684  if (TT.differ ? FLAG(q) || TT.is_binary : FLAG(s))
 685    printf("Files %s and %s %s\n", files[0], files[1],
 686      TT.differ ? "differ" : "are identical");
 687}
 688
 689static void create_empty_entry(int l , int r, int j)
 690{
 691  struct stat st[2];
 692  char *f[2], *path[2];
 693  int i;
 694
 695  for (i = 0; i < 2; i++) {
 696    if (j) {
 697      if (!FLAG(N) || i!=(j>0)) continue;
 698      path[!i] = concat_file_path(TT.dir[!i].list[0],
 699        TT.dir[i].list[i ? r : l]+TT.len[i]);
 700      f[!i] = "/dev/null";
 701    }
 702    path[i] = f[i] = TT.dir[i].list[i ? r : l];
 703    stat(f[i], st+i);
 704    if (j) st[!i] = st[i];
 705  }
 706
 707  for (i = 0; i<2; i++) {
 708    if (!S_ISREG(st[i].st_mode) && !S_ISDIR(st[i].st_mode)) {
 709      printf("File %s is not a regular file or directory and was skipped\n",
 710        path[i]);
 711      break;
 712    }
 713  }
 714
 715  if (i != 2);
 716  else if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode))
 717    printf("Common subdirectories: %s and %s\n", path[0], path[1]);
 718  else if ((i = S_ISDIR(st[0].st_mode)) != S_ISDIR(st[1].st_mode)) {
 719    char *fidir[] = {"directory", "regular file"};
 720    printf("File %s is a %s while file %s is a %s\n",
 721      path[0], fidir[!i], path[1], fidir[i]);
 722  } else {
 723    do_diff(f);
 724    show_status(path);
 725    if (TT.file[0].fp) fclose(TT.file[0].fp);
 726    if (TT.file[1].fp) fclose(TT.file[1].fp);
 727  }
 728
 729  if (FLAG(N) && j) free(path[j<=0]);
 730}
 731
 732static void diff_dir(int *start)
 733{
 734  int l, r, j = 0;
 735
 736  l = start[0]; //left side file start
 737  r = start[1]; //right side file start
 738  while (l < TT.dir[0].nr_elm && r < TT.dir[1].nr_elm) {
 739    if ((j = strcmp (TT.dir[0].list[l]+TT.len[0],
 740            (TT.dir[1].list[r]+TT.len[1]))) && !FLAG(N)) {
 741      if (j > 0) {
 742        printf("Only in %s: %s\n", TT.dir[1].list[0], TT.dir[1].list[r]+TT.len[1]);
 743        free(TT.dir[1].list[r++]);
 744      } else {
 745        printf ("Only in %s: %s\n", TT.dir[0].list[0], TT.dir[0].list[l]+TT.len[0]);
 746        free(TT.dir[0].list[l++]);
 747      }
 748      TT.differ = 1;
 749    } else {
 750      create_empty_entry(l, r, j); //create non empty dirs/files if -N.
 751      if (j>=0) free(TT.dir[1].list[r++]);
 752      if (j<=0) free(TT.dir[0].list[l++]);
 753    }
 754  }
 755
 756  if (l == TT.dir[0].nr_elm) {
 757    while (r<TT.dir[1].nr_elm) {
 758      if (!FLAG(N)) {
 759        printf ("Only in %s: %s\n", TT.dir[1].list[0], TT.dir[1].list[r]+TT.len[1]);
 760        TT.differ = 1;
 761      } else create_empty_entry(l, r, 1);
 762      free(TT.dir[1].list[r++]);
 763    }
 764  } else if (r == TT.dir[1].nr_elm) {
 765    while (l<TT.dir[0].nr_elm) {
 766      if (!FLAG(N)) {
 767        printf ("Only in %s: %s\n", TT.dir[0].list[0], TT.dir[0].list[l]+TT.len[0]);
 768        TT.differ = 1;
 769      } else create_empty_entry(l, r, -1);
 770      free(TT.dir[0].list[l++]);
 771    }
 772  }
 773  free(TT.dir[0].list[0]); //we are done, free root nodes too
 774  free(TT.dir[0].list);
 775  free(TT.dir[1].list[0]);
 776  free(TT.dir[1].list);
 777}
 778
 779void diff_main(void)
 780{
 781  int j = 0, k = 1, start[2] = {1, 1};
 782  char **files = toys.optargs;
 783
 784  toys.exitval = 2;
 785  if (FLAG(color) && !isatty(1)) toys.optflags ^= FLAG_color;
 786
 787  for (j = 0; j < 2; j++) {
 788    if (IS_STDIN(files[j])) fstat(0, &TT.st[j]);
 789    else xstat(files[j], &TT.st[j]);
 790  }
 791
 792  if (S_ISDIR(TT.st[0].st_mode) != S_ISDIR(TT.st[1].st_mode))
 793    error_exit("can't compare directory to non-directory");
 794
 795  if (TT.unchanged_line_format || TT.old_line_format || TT.new_line_format) {
 796    if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode))
 797      error_exit("can't use line format with directories");
 798    if (!TT.unchanged_line_format) TT.unchanged_line_format = "%l\n";
 799    if (!TT.old_line_format) TT.old_line_format = "%l\n";
 800    if (!TT.new_line_format) TT.new_line_format = "%l\n";
 801  }
 802
 803  if (same_file(TT.st, TT.st+1)) {
 804    toys.exitval = 0;
 805    return show_status(files);
 806  }
 807
 808  if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode)) {
 809    for (j = 0; j < 2; j++) {
 810      memset(TT.dir+j, 0, sizeof(*TT.dir));
 811      dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir);
 812      TT.dir[j].nr_elm = TT.size; //size updated in list_dir
 813      qsort(&TT.dir[j].list[1], TT.size-1, sizeof(char *), (void *)cmp);
 814
 815      TT.len[j] = strlen(TT.dir[j].list[0]); //calc root node len
 816      TT.len[j] += TT.dir[j].list[0][TT.len[j]-1] != '/';
 817
 818      if (FLAG(S)) {
 819        while (k<TT.size && strcmp(TT.dir[j].list[k]+TT.len[j], TT.S)<0) {
 820          start[j]++;
 821          k++;
 822        }
 823      }
 824      TT.dir_num++;
 825      TT.size = 0;
 826      k = 1;
 827    }
 828    diff_dir(start);
 829  } else {
 830    if (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)) {
 831      int d = S_ISDIR(TT.st[0].st_mode);
 832      char *slash = strrchr(files[d], '/');
 833
 834      files[!d] = concat_file_path(files[!d], slash ? slash+1 : files[d]);
 835      if (stat(files[!d], &TT.st[!d])) perror_exit("%s", files[!d]);
 836    }
 837    do_diff(files);
 838    show_status(files);
 839    if (TT.file[0].fp) fclose(TT.file[0].fp);
 840    if (TT.file[1].fp) fclose(TT.file[1].fp);
 841  }
 842  toys.exitval = TT.differ; //exit status will be the status
 843}
 844