toybox/toys/posix/tar.c
<<
>>
Prefs
   1/* tar.c - create/extract archives
   2 *
   3 * Copyright 2014 Ashwini Kumar <ak.ashwini81@gmail.com>
   4 *
   5 * For the command, see
   6 *   http://pubs.opengroup.org/onlinepubs/007908799/xcu/tar.html
   7 * For the modern file format, see
   8 *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
   9 *   https://en.wikipedia.org/wiki/Tar_(computing)#File_format
  10 *   https://www.gnu.org/software/tar/manual/html_node/Tar-Internals.html
  11 *
  12 * For writing to external program
  13 * http://www.gnu.org/software/tar/manual/html_node/Writing-to-an-External-Program.html
  14 *
  15 * Toybox will never implement the "pax" command as a matter of policy.
  16 *
  17 * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
  18 *
  19
  20USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
  21
  22config TAR
  23  bool "tar"
  24  default y
  25  help
  26    usage: tar [-cxt] [-fvohmjkOS] [-XTCf NAME] [FILE...]
  27
  28    Create, extract, or list files in a .tar (or compressed t?z) file.
  29
  30    Options:
  31    c  Create                x  Extract               t  Test (list)
  32    f  tar FILE (default -)  C  Change to DIR first   v  Verbose display
  33    o  Ignore owner          h  Follow symlinks       m  Ignore mtime
  34    J  xz compression        j  bzip2 compression     z  gzip compression
  35    O  Extract to stdout     X  exclude names in FILE T  include names in FILE
  36
  37    --exclude        FILENAME to exclude    --full-time   Show seconds with -tv
  38    --mode MODE      Adjust modes           --mtime TIME  Override timestamps
  39    --owner NAME     Set file owner to NAME --group NAME  Set file group to NAME
  40    --sparse         Record sparse files
  41    --restrict       All archive contents must extract under one subdirectory
  42    --numeric-owner  Save/use/display uid and gid, not user/group name
  43    --no-recursion   Don't store directory contents
  44    -I PROG          Filter through PROG to compress or PROG -d to decompress
  45*/
  46
  47#define FOR_tar
  48#include "toys.h"
  49
  50GLOBALS(
  51  char *f, *C;
  52  struct arg_list *T, *X;
  53  char *I, *to_command, *owner, *group, *mtime, *mode;
  54  struct arg_list *exclude;
  55
  56  struct double_list *incl, *excl, *seen;
  57  struct string_list *dirs;
  58  char *cwd;
  59  int fd, ouid, ggid, hlc, warn, adev, aino, sparselen;
  60  long long *sparse;
  61  time_t mtt;
  62
  63  // hardlinks seen so far (hlc many)
  64  struct {
  65    char *arg;
  66    ino_t ino;
  67    dev_t dev;
  68  } *hlx;
  69
  70  // Parsed information about a tar header.
  71  struct tar_header {
  72    char *name, *link_target, *uname, *gname;
  73    long long size, ssize;
  74    uid_t uid;
  75    gid_t gid;
  76    mode_t mode;
  77    time_t mtime;
  78    dev_t device;
  79  } hdr;
  80)
  81
  82struct tar_hdr {
  83  char name[100], mode[8], uid[8], gid[8],size[12], mtime[12], chksum[8],
  84       type, link[100], magic[8], uname[32], gname[32], major[8], minor[8],
  85       prefix[155], padd[12];
  86};
  87
  88// convert from int to octal (or base-256)
  89static void itoo(char *str, int len, unsigned long long val)
  90{
  91  // Do we need binary encoding?
  92  if (!(val>>(3*(len-1)))) sprintf(str, "%0*llo", len-1, val);
  93  else {
  94    *str = 128;
  95    while (--len) *++str = val>>(3*len);
  96  }
  97}
  98#define ITOO(x, y) itoo(x, sizeof(x), y)
  99
 100// convert octal (or base-256) to int
 101static unsigned long long otoi(char *str, unsigned len)
 102{
 103  unsigned long long val = 0;
 104
 105  // When tar value too big or octal, use binary encoding with high bit set
 106  if (128&*str) while (--len) val = (val<<8)+*++str;
 107  else {
 108    while (len && *str == ' ') str++;
 109    while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
 110    if (len && *str && *str != ' ') error_exit("bad header");
 111  }
 112
 113  return val;
 114}
 115#define OTOI(x) otoi(x, sizeof(x))
 116
 117static void write_longname(char *name, char type)
 118{
 119  struct tar_hdr tmp;
 120  int sz = strlen(name) +1;
 121
 122  memset(&tmp, 0, sizeof(tmp));
 123  strcpy(tmp.name, "././@LongLink");
 124  ITOO(tmp.uid, 0);
 125  ITOO(tmp.gid, 0);
 126  ITOO(tmp.size, sz);
 127  ITOO(tmp.mtime, 0);
 128  tmp.type = type;
 129  strcpy(tmp.magic, "ustar  ");
 130
 131  // Historical nonsense to match other implementations. Never used.
 132  ITOO(tmp.mode, 0644);
 133  strcpy(tmp.uname, "root");
 134  strcpy(tmp.gname, "root");
 135
 136  // Calculate checksum. Since 512*255 = 0377000 in octal, this can never
 137  // use more than 6 digits. The last byte is ' ' for historical reasons.
 138  itoo(tmp.chksum, sizeof(tmp.chksum)-1, tar_cksum(&tmp));
 139  tmp.chksum[7] = ' ';
 140
 141  // write header and name, padded with NUL to block size
 142  xwrite(TT.fd, &tmp, 512);
 143  xwrite(TT.fd, name, sz);
 144  if (sz%512) xwrite(TT.fd, toybuf, 512-(sz%512));
 145}
 146
 147static struct double_list *filter(struct double_list *lst, char *name)
 148{
 149  struct double_list *end = lst;
 150
 151  if (lst)
 152    // constant is FNM_LEADING_DIR
 153    do if (!fnmatch(lst->data, name, 1<<3)) return lst;
 154    while (end != (lst = lst->next));
 155
 156  return 0;
 157}
 158
 159static void skippy(long long len)
 160{
 161  if (lskip(TT.fd, len)) perror_exit("EOF");
 162}
 163
 164// allocate and read data from TT.fd
 165static void alloread(void *buf, int len)
 166{
 167  // actually void **, but automatic typecasting doesn't work with void ** :(
 168  char **b = buf;
 169
 170  free(*b);
 171  *b = xmalloc(len+1);
 172  xreadall(TT.fd, *b, len);
 173  (*b)[len] = 0;
 174}
 175
 176// callback from dirtree to create archive
 177static int add_to_tar(struct dirtree *node)
 178{
 179  struct stat *st = &(node->st);
 180  struct tar_hdr hdr;
 181  struct passwd *pw = pw;
 182  struct group *gr = gr;
 183  int i, fd = -1, norecurse = FLAG(no_recursion);
 184  char *name, *lnk, *hname;
 185
 186  if (!dirtree_notdotdot(node)) return 0;
 187  if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
 188    error_msg("'%s' file is the archive; not dumped", node->name);
 189    return 0;
 190  }
 191
 192  i = 1;
 193  name = hname = dirtree_path(node, &i);
 194
 195  // exclusion defaults to --no-anchored and --wildcards-match-slash
 196  for (lnk = name; *lnk;) {
 197    if (filter(TT.excl, lnk)) {
 198      norecurse++;
 199
 200      goto done;
 201    }
 202    while (*lnk && *lnk!='/') lnk++;
 203    while (*lnk=='/') lnk++;
 204  }
 205
 206  // Consume the 1 extra byte alocated in dirtree_path()
 207  if (S_ISDIR(st->st_mode) && name[i-1] != '/') strcat(name, "/");
 208
 209  // remove leading / and any .. entries from saved name
 210  if (!FLAG(P)) while (*hname == '/') hname++;
 211  for (lnk = hname;;) {
 212    if (!(lnk = strstr(lnk, ".."))) break;
 213    if (lnk == hname || lnk[-1] == '/') {
 214      if (!lnk[2]) goto done;
 215      if (lnk[2]=='/') lnk = hname = lnk+3;
 216    } else lnk+= 2;
 217  }
 218  if (!*hname) goto done;
 219
 220  if (TT.warn && hname != name) {
 221    fprintf(stderr, "removing leading '%.*s' from member names\n",
 222           (int)(hname-name), name);
 223    TT.warn = 0;
 224  }
 225
 226  if (TT.owner) st->st_uid = TT.ouid;
 227  if (TT.group) st->st_gid = TT.ggid;
 228  if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
 229  if (TT.mtime) st->st_mtime = TT.mtt;
 230
 231  memset(&hdr, 0, sizeof(hdr));
 232  strncpy(hdr.name, hname, sizeof(hdr.name));
 233  ITOO(hdr.mode, st->st_mode &07777);
 234  ITOO(hdr.uid, st->st_uid);
 235  ITOO(hdr.gid, st->st_gid);
 236  ITOO(hdr.size, 0); //set size later
 237  ITOO(hdr.mtime, st->st_mtime);
 238  strcpy(hdr.magic, "ustar  ");
 239
 240  // Hard link or symlink? i=0 neither, i=1 hardlink, i=2 symlink
 241
 242  // Are there hardlinks to a non-directory entry?
 243  if (st->st_nlink>1 && !S_ISDIR(st->st_mode)) {
 244    // Have we seen this dev&ino before?
 245    for (i = 0; i<TT.hlc; i++) {
 246      if (st->st_ino == TT.hlx[i].ino && st->st_dev == TT.hlx[i].dev)
 247        break;
 248    }
 249    if (i != TT.hlc) {
 250      lnk = TT.hlx[i].arg;
 251      i = 1;
 252    } else {
 253      // first time we've seen it. Store as normal file, but remember it.
 254      if (!(TT.hlc&255))
 255        TT.hlx = xrealloc(TT.hlx, sizeof(*TT.hlx)*(TT.hlc+256));
 256      TT.hlx[TT.hlc].arg = xstrdup(hname);
 257      TT.hlx[TT.hlc].ino = st->st_ino;
 258      TT.hlx[TT.hlc].dev = st->st_dev;
 259      TT.hlc++;
 260      i = 0;
 261    }
 262  } else i = 0;
 263
 264  // Handle file types
 265  if (i || S_ISLNK(st->st_mode)) {
 266    hdr.type = '1'+!i;
 267    if (!i && !(lnk = xreadlink(name))) {
 268      perror_msg("readlink");
 269      goto done;
 270    }
 271    if (strlen(lnk) > sizeof(hdr.link)) write_longname(lnk, 'K');
 272    strncpy(hdr.link, lnk, sizeof(hdr.link));
 273    if (!i) free(lnk);
 274  } else if (S_ISREG(st->st_mode)) {
 275    hdr.type = '0';
 276    ITOO(hdr.size, st->st_size);
 277  } else if (S_ISDIR(st->st_mode)) hdr.type = '5';
 278  else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
 279  else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
 280    hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
 281    ITOO(hdr.major, dev_major(st->st_rdev));
 282    ITOO(hdr.minor, dev_minor(st->st_rdev));
 283  } else {
 284    error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
 285    goto done;
 286  }
 287
 288  if (strlen(hname) > sizeof(hdr.name)) write_longname(hname, 'L');
 289
 290  if (!FLAG(numeric_owner)) {
 291    if (TT.owner || (pw = bufgetpwuid(st->st_uid)))
 292      strncpy(hdr.uname, TT.owner ? TT.owner : pw->pw_name, sizeof(hdr.uname));
 293    if (TT.group || (gr = bufgetgrgid(st->st_gid)))
 294      strncpy(hdr.gname, TT.group ? TT.group : gr->gr_name, sizeof(hdr.gname));
 295  }
 296
 297  TT.sparselen = 0;
 298  if (hdr.type == '0') {
 299    // Before we write the header, make sure we can read the file
 300    if ((fd = open(name, O_RDONLY)) < 0) {
 301      perror_msg("can't open '%s'", name);
 302
 303      return 0;
 304    }
 305    if (FLAG(S)) {
 306      long long lo, ld = 0, len = 0;
 307
 308      // Enumerate the extents
 309      while ((lo = lseek(fd, ld, SEEK_HOLE)) != -1) {
 310        if (!(TT.sparselen&511))
 311          TT.sparse = xrealloc(TT.sparse, (TT.sparselen+514)*sizeof(long long));
 312        if (ld != lo) {
 313          TT.sparse[TT.sparselen++] = ld;
 314          len += TT.sparse[TT.sparselen++] = lo-ld;
 315        }
 316        if (lo == st->st_size || (ld = lseek(fd, lo, SEEK_DATA)) < lo) break;
 317      }
 318
 319      // If there were extents, change type to S record
 320      if (TT.sparselen>2) {
 321        TT.sparse[TT.sparselen++] = st->st_size;
 322        TT.sparse[TT.sparselen++] = 0;
 323        hdr.type = 'S';
 324        lnk = (char *)&hdr;
 325        for (i = 0; i<TT.sparselen && i<8; i++)
 326          itoo(lnk+386+12*i, 12, TT.sparse[i]);
 327
 328        // Record if there's overflow records, change length to sparse length,
 329        // record apparent length
 330        if (TT.sparselen>8) lnk[482] = 1;
 331        itoo(lnk+483, 12, st->st_size);
 332        ITOO(hdr.size, len);
 333      } else TT.sparselen = 0;
 334      lseek(fd, 0, SEEK_SET);
 335    }
 336  }
 337
 338  itoo(hdr.chksum, sizeof(hdr.chksum)-1, tar_cksum(&hdr));
 339  hdr.chksum[7] = ' ';
 340
 341  if (FLAG(v)) dprintf((TT.fd==1) ? 2 : 1, "%s\n", hname);
 342
 343  // Write header and data to archive
 344  xwrite(TT.fd, &hdr, 512);
 345  if (TT.sparselen>8) {
 346    char buf[512];
 347
 348    // write extent overflow blocks
 349    for (i=8;;i++) {
 350      int j = (i-8)%42;
 351
 352      if (!j || i==TT.sparselen) {
 353        if (i!=8) {
 354          if (i!=TT.sparselen) buf[504] = 1;
 355          xwrite(TT.fd, buf, 512);
 356        }
 357        if (i==TT.sparselen) break;
 358        memset(buf, 0, sizeof(buf));
 359      }
 360      itoo(buf+12*j, 12, TT.sparse[i]);
 361    }
 362  }
 363  TT.sparselen >>= 1;
 364  if (hdr.type == '0' || hdr.type == 'S') {
 365    if (hdr.type == '0') xsendfile_pad(fd, TT.fd, st->st_size);
 366    else for (i = 0; i<TT.sparselen; i++) {
 367      if (TT.sparse[i*2] != lseek(fd, TT.sparse[i*2], SEEK_SET))
 368        perror_msg("%s: seek %lld", name, TT.sparse[i*2]);
 369      xsendfile_pad(fd, TT.fd, TT.sparse[i*2+1]);
 370    }
 371    if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
 372    close(fd);
 373  }
 374done:
 375  free(name);
 376
 377  return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!norecurse;
 378}
 379
 380static void wsettime(char *s, long long sec)
 381{
 382  struct timespec times[2] = {{sec, 0},{sec, 0}};
 383
 384  if (utimensat(AT_FDCWD, s, times, AT_SYMLINK_NOFOLLOW))
 385    perror_msg("settime %lld %s", sec, s);
 386}
 387
 388// Do pending directory utimes(), NULL to flush all.
 389static int dirflush(char *name, int isdir)
 390{
 391  char *s = 0, *ss;
 392
 393  // Barf if name not in TT.cwd
 394  if (name) {
 395    if (!(ss = s = xabspath(name, -1-isdir))) {
 396      error_msg("'%s' bad symlink", name);
 397
 398      return 1;
 399    }
 400    if (TT.cwd[1] && (!strstart(&ss, TT.cwd) || (*ss && *ss!='/'))) {
 401      error_msg("'%s' %s not under '%s'", name, s, TT.cwd);
 402      free(s);
 403
 404      return 1;
 405    }
 406
 407    // --restrict means first entry extracted is what everything must be under
 408    if (FLAG(restrict)) {
 409      free(TT.cwd);
 410      TT.cwd = strdup(s);
 411      toys.optflags ^= FLAG_restrict;
 412    }
 413    // use resolved name so trailing / is stripped
 414    if (isdir) unlink(s);
 415  }
 416
 417  // Set deferred utimes() for directories this file isn't under.
 418  // (Files must be depth-first ordered in tarball for this to matter.)
 419  while (TT.dirs) {
 420
 421    // If next file is under (or equal to) this dir, keep waiting
 422    if (name && strstart(&ss, ss = s) && (!*ss || *ss=='/')) break;
 423
 424    wsettime(TT.dirs->str+sizeof(long long), *(long long *)TT.dirs->str);
 425    free(llist_pop(&TT.dirs));
 426  }
 427  free(s);
 428
 429  // name was under TT.cwd
 430  return 0;
 431}
 432
 433// write data to file
 434static void sendfile_sparse(int fd)
 435{
 436  long long len, used = 0, sent;
 437  int i = 0, j;
 438
 439  do {
 440    if (TT.sparselen) {
 441      // Seek past holes or fill output with zeroes.
 442      if (-1 == lseek(fd, len = TT.sparse[i*2], SEEK_SET)) {
 443        sent = 0;
 444        while (len) {
 445          // first/last 512 bytes used, rest left zeroes
 446          j = (len>3072) ? 3072 : len;
 447          if (j != writeall(fd, toybuf+512, j)) goto error;
 448          len -= j;
 449        }
 450      } else {
 451        sent = len;
 452        if (!(len = TT.sparse[i*2+1]) && ftruncate(fd, sent+len))
 453          perror_msg("ftruncate");
 454      }
 455      if (len+used>TT.hdr.size) error_exit("sparse overflow");
 456    } else len = TT.hdr.size;
 457
 458    len -= sendfile_len(TT.fd, fd, len, &sent);
 459    used += sent;
 460    if (len) {
 461error:
 462      if (fd!=1) perror_msg(0);
 463      skippy(TT.hdr.size-used);
 464
 465      break;
 466    }
 467  } while (++i<TT.sparselen);
 468
 469  close(fd);
 470}
 471
 472static void extract_to_disk(void)
 473{
 474  char *name = TT.hdr.name;
 475  int ala = TT.hdr.mode;
 476
 477  if (dirflush(name, S_ISDIR(ala))) {
 478    if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size);
 479 
 480    return;
 481  }
 482
 483  // create path before file if necessary
 484  if (strrchr(name, '/') && mkpath(name) && errno!=EEXIST)
 485      return perror_msg(":%s: can't mkdir", name);
 486
 487  // remove old file, if exists
 488  if (!FLAG(k) && !S_ISDIR(ala) && unlink(name) && errno!=ENOENT)
 489    return perror_msg("can't remove: %s", name);
 490
 491  if (S_ISREG(ala)) {
 492    // hardlink?
 493    if (TT.hdr.link_target) {
 494      if (link(TT.hdr.link_target, name))
 495        return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
 496    // write contents
 497    } else {
 498      int fd = xcreate(name,
 499        WARN_ONLY|O_WRONLY|O_CREAT|(FLAG(overwrite)?O_TRUNC:O_EXCL),
 500        ala & 07777);
 501      if (fd != -1) sendfile_sparse(fd);
 502      else skippy(TT.hdr.size);
 503    }
 504  } else if (S_ISDIR(ala)) {
 505    if ((mkdir(name, 0700) == -1) && errno != EEXIST)
 506      return perror_msg("%s: can't create", TT.hdr.name);
 507  } else if (S_ISLNK(ala)) {
 508    if (symlink(TT.hdr.link_target, TT.hdr.name))
 509      return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
 510  } else if (mknod(name, ala, TT.hdr.device))
 511    return perror_msg("can't create '%s'", name);
 512
 513  // Set ownership
 514  if (!FLAG(o) && !geteuid()) {
 515    int u = TT.hdr.uid, g = TT.hdr.gid;
 516
 517    if (TT.owner) TT.hdr.uid = TT.ouid;
 518    else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
 519      struct passwd *pw = getpwnam(TT.hdr.uname);
 520      if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
 521    }
 522
 523    if (TT.group) TT.hdr.gid = TT.ggid;
 524    else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
 525      struct group *gr = getgrnam(TT.hdr.gname);
 526      if (gr) TT.hdr.gid = gr->gr_gid;
 527    }
 528
 529    if (lchown(name, u, g)) perror_msg("chown %d:%d '%s'", u, g, name);;
 530  }
 531
 532  if (!S_ISLNK(ala)) chmod(TT.hdr.name, FLAG(p) ? ala : ala&0777);
 533
 534  // Apply mtime.
 535  if (!FLAG(m)) {
 536    if (S_ISDIR(ala)) {
 537      struct string_list *sl;
 538
 539      // Writing files into a directory changes directory timestamps, so
 540      // defer mtime updates until contents written.
 541
 542      sl = xmalloc(sizeof(struct string_list)+sizeof(long long)+strlen(name)+1);
 543      *(long long *)sl->str = TT.hdr.mtime;
 544      strcpy(sl->str+sizeof(long long), name);
 545      sl->next = TT.dirs;
 546      TT.dirs = sl;
 547    } else wsettime(TT.hdr.name, TT.hdr.mtime);
 548  }
 549}
 550
 551static void unpack_tar(char *first)
 552{
 553  struct double_list *walk, *delete;
 554  struct tar_hdr tar;
 555  int i, and = 0;
 556  unsigned maj, min;
 557  char *s;
 558
 559  for (;;) {
 560    if (first) {
 561      memcpy(&tar, first, i = 512);
 562      first = 0;
 563    } else {
 564      // align to next block and read it
 565      if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
 566      i = readall(TT.fd, &tar, 512);
 567    }
 568
 569    if (i && i!=512) error_exit("short header");
 570
 571    // Two consecutive empty headers ends tar even if there's more data
 572    if (!i || !*tar.name) {
 573      if (!i || and++) return;
 574      TT.hdr.size = 0;
 575      continue;
 576    }
 577    // ensure null temination even of pathological packets
 578    tar.padd[0] = and = 0;
 579
 580    // Is this a valid TAR header?
 581    if (!is_tar_header(&tar)) error_exit("bad header");
 582    TT.hdr.size = OTOI(tar.size);
 583
 584    // If this header isn't writing something to the filesystem
 585    if ((tar.type<'0' || tar.type>'7') && tar.type!='S'
 586        && (*tar.magic && tar.type))
 587    {
 588      // Long name extension header?
 589      if (tar.type == 'K') alloread(&TT.hdr.link_target, TT.hdr.size);
 590      else if (tar.type == 'L') alloread(&TT.hdr.name, TT.hdr.size);
 591      else if (tar.type == 'x') {
 592        char *p, *buf = 0;
 593        int i, len, n = 0;
 594
 595        // Posix extended record "LEN NAME=VALUE\n" format
 596        alloread(&buf, TT.hdr.size);
 597        for (p = buf; (p-buf)<TT.hdr.size; p += len) {
 598          i = sscanf(p, "%u path=%n", &len, &n);
 599          if (i<1 || len<4 || len>TT.hdr.size) {
 600            error_msg("bad header");
 601            break;
 602          }
 603          p[len-1] = 0;
 604          if (n) {
 605            TT.hdr.name = xstrdup(p+n);
 606            break;
 607          }
 608        }
 609        free(buf);
 610
 611      // Ignore everything else.
 612      } else skippy(TT.hdr.size);
 613
 614      continue;
 615    }
 616
 617    // Handle sparse file type
 618    if (tar.type == 'S') {
 619      char sparse[512];
 620      int max = 8;
 621
 622      // Load 4 pairs of offset/len from S block, plus 21 pairs from each
 623      // continuation block, list says where to seek/write sparse file contents
 624      TT.sparselen = 0;
 625      s = 386+(char *)&tar;
 626      *sparse = i = 0;
 627
 628      for (;;) {
 629        if (!(TT.sparselen&511))
 630          TT.sparse = xrealloc(TT.sparse, (TT.sparselen+512)*sizeof(long long));
 631
 632        // If out of data in block check continue flag, stop or load next block
 633        if (++i>max || !*s) {
 634          if (!(*sparse ? sparse[504] : ((char *)&tar)[482])) break;
 635          xreadall(TT.fd, s = sparse, 512);
 636          max = 41;
 637          i = 0;
 638        }
 639        // Load next entry
 640        TT.sparse[TT.sparselen++] = otoi(s, 12);
 641        s += 12;
 642      }
 643
 644      // Odd number of entries (from corrupted tar) would be dropped here
 645      TT.sparselen /= 2;
 646      if (TT.sparselen)
 647        TT.hdr.ssize = TT.sparse[2*TT.sparselen-1]+TT.sparse[2*TT.sparselen-2];
 648    } else {
 649      TT.sparselen = 0;
 650      TT.hdr.ssize = TT.hdr.size;
 651    }
 652
 653    // At this point, we have something to output. Convert metadata.
 654    TT.hdr.mode = OTOI(tar.mode)&0xfff;
 655    if (tar.type == 'S' || !tar.type) TT.hdr.mode |= 0x8000;
 656    else TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
 657    TT.hdr.uid = OTOI(tar.uid);
 658    TT.hdr.gid = OTOI(tar.gid);
 659    TT.hdr.mtime = OTOI(tar.mtime);
 660    maj = OTOI(tar.major);
 661    min = OTOI(tar.minor);
 662    TT.hdr.device = dev_makedev(maj, min);
 663    TT.hdr.uname = xstrndup(TT.owner ? TT.owner : tar.uname, sizeof(tar.uname));
 664    TT.hdr.gname = xstrndup(TT.group ? TT.group : tar.gname, sizeof(tar.gname));
 665
 666    if (TT.owner) TT.hdr.uid = TT.ouid;
 667    else if (!FLAG(numeric_owner)) {
 668      struct passwd *pw = getpwnam(TT.hdr.uname);
 669      if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
 670    }
 671
 672    if (TT.group) TT.hdr.gid = TT.ggid;
 673    else if (!FLAG(numeric_owner)) {
 674      struct group *gr = getgrnam(TT.hdr.gname);
 675      if (gr) TT.hdr.gid = gr->gr_gid;
 676    }
 677
 678    if (!TT.hdr.link_target && *tar.link)
 679      TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
 680    if (!TT.hdr.name) {
 681      // Glue prefix and name fields together with / if necessary
 682      i = (tar.type=='S') ? 0 : strnlen(tar.prefix, sizeof(tar.prefix));
 683      TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
 684        (i && tar.prefix[i-1] != '/') ? "/" : "",
 685        (int)sizeof(tar.name), tar.name);
 686    }
 687
 688    // Old broken tar recorded dir as "file with trailing slash"
 689    if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
 690      *s = 0;
 691      TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
 692    }
 693
 694    // Non-regular files don't have contents stored in archive.
 695    if ((TT.hdr.link_target && *TT.hdr.link_target)
 696      || (tar.type && !S_ISREG(TT.hdr.mode)))
 697        TT.hdr.size = 0;
 698
 699    // Files are seen even if excluded, so check them here.
 700    // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
 701
 702    if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
 703      if (!TT.seen) TT.seen = delete;
 704
 705      // Move seen entry to end of list.
 706      if (TT.incl == delete) TT.incl = TT.incl->next;
 707      else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
 708        if (walk == delete) {
 709          dlist_pop(&walk);
 710          dlist_add_nomalloc(&TT.incl, delete);
 711        }
 712      }
 713    }
 714
 715    // Skip excluded files
 716    if (filter(TT.excl, TT.hdr.name) || (TT.incl && !delete))
 717      skippy(TT.hdr.size);
 718    else if (FLAG(t)) {
 719      if (FLAG(v)) {
 720        struct tm *lc = localtime(TT.mtime ? &TT.mtt : &TT.hdr.mtime);
 721        char perm[12], gname[12];
 722
 723        mode_to_string(TT.hdr.mode, perm);
 724        printf("%s", perm);
 725        sprintf(perm, "%u", TT.hdr.uid);
 726        sprintf(gname, "%u", TT.hdr.gid);
 727        printf(" %s/%s ", *TT.hdr.uname ? TT.hdr.uname : perm,
 728          *TT.hdr.gname ? TT.hdr.gname : gname);
 729        if (tar.type=='3' || tar.type=='4') printf("%u,%u", maj, min);
 730        else printf("%9lld", TT.hdr.ssize);
 731        sprintf(perm, ":%02d", lc->tm_sec);
 732        printf("  %d-%02d-%02d %02d:%02d%s ", 1900+lc->tm_year, 1+lc->tm_mon,
 733          lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : "");
 734      }
 735      printf("%s", TT.hdr.name);
 736      if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
 737      xputc('\n');
 738      skippy(TT.hdr.size);
 739    } else {
 740      if (FLAG(v)) printf("%s\n", TT.hdr.name);
 741      if (FLAG(O)) sendfile_sparse(1);
 742      else if (FLAG(to_command)) {
 743        if (S_ISREG(TT.hdr.mode)) {
 744          int fd, pid;
 745
 746          xsetenv("TAR_FILETYPE", "f");
 747          xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0);
 748          xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0);
 749          xsetenv("TAR_FILENAME", TT.hdr.name);
 750          xsetenv("TAR_UNAME", TT.hdr.uname);
 751          xsetenv("TAR_GNAME", TT.hdr.gname);
 752          xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0);
 753          xsetenv(xmprintf("TAR_UID=%o", TT.hdr.uid), 0);
 754          xsetenv(xmprintf("TAR_GID=%o", TT.hdr.gid), 0);
 755
 756          pid = xpopen((char *[]){"sh", "-c", TT.to_command, NULL}, &fd, 0);
 757          // todo: short write exits tar here, other skips data.
 758          sendfile_sparse(fd);
 759          fd = xpclose_both(pid, 0);
 760          if (fd) error_msg("%d: Child returned %d", pid, fd);
 761        }
 762      } else extract_to_disk();
 763    }
 764
 765    free(TT.hdr.name);
 766    free(TT.hdr.link_target);
 767    free(TT.hdr.uname);
 768    free(TT.hdr.gname);
 769    TT.hdr.name = TT.hdr.link_target = 0;
 770  }
 771}
 772
 773// Add copy of filename (minus trailing \n and /) to dlist **
 774static void trim2list(void *list, char *pline)
 775{
 776  char *n = xstrdup(pline);
 777  int i = strlen(n);
 778
 779  dlist_add(list, n);
 780  if (i && n[i-1]=='\n') i--;
 781  while (i && n[i-1] == '/') i--;
 782  n[i] = 0;
 783}
 784
 785// do_lines callback, selects TT.incl or TT.excl based on call order
 786static void do_XT(char **pline, long len)
 787{
 788  if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline);
 789}
 790
 791void tar_main(void)
 792{
 793  char *s, **args = toys.optargs,
 794    *archiver = FLAG(I) ? TT.I : (FLAG(z) ? "gzip" : (FLAG(J) ? "xz":"bzip2"));
 795  int len = 0;
 796
 797  // Needed when extracting to command
 798  signal(SIGPIPE, SIG_IGN);
 799
 800  // Get possible early errors out of the way
 801  if (!geteuid()) toys.optflags |= FLAG_p;
 802  if (TT.owner) TT.ouid = xgetuid(TT.owner);
 803  if (TT.group) TT.ggid = xgetgid(TT.group);
 804  if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
 805
 806  // Collect file list.
 807  for (; TT.exclude; TT.exclude = TT.exclude->next)
 808    trim2list(&TT.excl, TT.exclude->arg);
 809  for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', do_XT);
 810  for (args = toys.optargs; *args; args++) trim2list(&TT.incl, *args);
 811  for (;TT.T; TT.T = TT.T->next) do_lines(xopenro(TT.T->arg), '\n', do_XT);
 812
 813  // If include file list empty, don't create empty archive
 814  if (FLAG(c)) {
 815    if (!TT.incl) error_exit("empty archive");
 816    TT.fd = 1;
 817  }
 818
 819  // nommu reentry for nonseekable input skips this, parent did it for us
 820  if (toys.stacktop) {
 821    if (TT.f && strcmp(TT.f, "-"))
 822      TT.fd = xcreate(TT.f, TT.fd*(O_WRONLY|O_CREAT|O_TRUNC), 0666);
 823    // Get destination directory
 824    if (TT.C) xchdir(TT.C);
 825  }
 826
 827  // Get destination directory
 828  TT.cwd = xabspath(s = xgetcwd(), 1);
 829  free(s);
 830
 831  // Remember archive inode so we don't overwrite it or add it to itself
 832  {
 833    struct stat st;
 834
 835    if (!fstat(TT.fd, &st)) {
 836      TT.aino = st.st_ino;
 837      TT.adev = st.st_dev;
 838    }
 839  }
 840
 841  // Are we reading?
 842  if (FLAG(x)||FLAG(t)) {
 843    char *hdr = 0;
 844
 845    // autodetect compression type when not specified
 846    if (!(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J))) {
 847      len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512);
 848      if (len!=512 || !is_tar_header(hdr)) {
 849        // detect gzip and bzip signatures
 850        if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z;
 851        else if (!memcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j;
 852        else if (peek_be(hdr, 7) == 0xfd377a585a0000UL) toys.optflags |= FLAG_J;
 853        else error_exit("Not tar");
 854
 855        // if we can seek back we don't need to loop and copy data
 856        if (!lseek(TT.fd, -len, SEEK_CUR)) hdr = 0;
 857      }
 858    }
 859
 860    if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
 861      int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid;
 862      struct string_list *zcat = FLAG(I) ? 0 : find_in_path(getenv("PATH"),
 863        FLAG(j) ? "bzcat" : FLAG(J) ? "xzcat" : "zcat");
 864
 865      // Toybox provides more decompressors than compressors, so try them first
 866      xpopen_both(zcat ? (char *[]){zcat->str, 0} :
 867        (char *[]){archiver, "-d", 0}, pipefd);
 868      if (CFG_TOYBOX_FREE) llist_traverse(zcat, free);
 869
 870      if (!hdr) {
 871        // If we could seek, child gzip inherited fd and we read its output
 872        close(TT.fd);
 873        TT.fd = pipefd[1];
 874
 875      } else {
 876
 877        // If we autodetected type but then couldn't lseek to put the data back
 878        // we have to loop reading data from TT.fd and pass it to gzip ourselves
 879        // (starting with the block of data we read to autodetect).
 880
 881        // dirty trick: move gzip input pipe to stdin so child closes spare copy
 882        dup2(pipefd[0], 0);
 883        if (pipefd[0]) close(pipefd[0]);
 884
 885        // Fork a copy of ourselves to handle extraction (reads from zip output
 886        // pipe, writes to stdout).
 887        pipefd[0] = pipefd[1];
 888        pipefd[1] = 1;
 889        pid = xpopen_both(0, pipefd);
 890        close(pipefd[1]);
 891
 892        // loop writing collated data to zip proc
 893        xwrite(0, hdr, len);
 894        for (;;) {
 895          if ((i = read(TT.fd, toybuf, sizeof(toybuf)))<1) {
 896            close(0);
 897            xwaitpid(pid);
 898            return;
 899          }
 900          xwrite(0, toybuf, i);
 901        }
 902      }
 903    }
 904
 905    unpack_tar(hdr);
 906    dirflush(0, 0);
 907
 908    // Each time a TT.incl entry is seen it's moved to the end of the list,
 909    // with TT.seen pointing to first seen list entry. Anything between
 910    // TT.incl and TT.seen wasn't encountered in archive..
 911    if (TT.seen != TT.incl) {
 912      if (!TT.seen) TT.seen = TT.incl;
 913      while (TT.incl != TT.seen) {
 914        error_msg("'%s' not in archive", TT.incl->data);
 915        TT.incl = TT.incl->next;
 916      }
 917    }
 918
 919  // are we writing? (Don't have to test flag here, one of 3 must be set)
 920  } else {
 921    struct double_list *dl = TT.incl;
 922
 923    // autodetect compression type based on -f name. (Use > to avoid.)
 924    if (TT.f && !FLAG(j) && !FLAG(z) && !FLAG(I) && !FLAG(J)) {
 925      char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"};
 926      if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz"))
 927        toys.optflags |= FLAG_z;
 928      if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz"))
 929        toys.optflags |= FLAG_J;
 930      else for (len = 0; len<ARRAY_LEN(tbz); len++)
 931        if (strend(TT.f, tbz[len])) toys.optflags |= FLAG_j;
 932    }
 933
 934    if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
 935      int pipefd[2] = {-1, TT.fd};
 936
 937      xpopen_both((char *[]){archiver, 0}, pipefd);
 938      close(TT.fd);
 939      TT.fd = pipefd[0];
 940    }
 941    do {
 942      TT.warn = 1;
 943      dirtree_flagread(dl->data, FLAG(h)?DIRTREE_SYMFOLLOW:0, add_to_tar);
 944    } while (TT.incl != (dl = dl->next));
 945
 946    writeall(TT.fd, toybuf, 1024);
 947  }
 948
 949  if (CFG_TOYBOX_FREE) {
 950    llist_traverse(TT.excl, llist_free_double);
 951    llist_traverse(TT.incl, llist_free_double);
 952    while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
 953    free(TT.hlx);
 954    free(TT.cwd);
 955    close(TT.fd);
 956  }
 957}
 958