toybox/toys/posix/tar.c
<<
>>
Prefs
   1/* tar.c - create/extract archives
   2 *
   3 * Copyright 2014 Ashwini Kumar <ak.ashwini81@gmail.com>
   4 *
   5 * For the command, see
   6 *   http://pubs.opengroup.org/onlinepubs/007908799/xcu/tar.html
   7 * For the modern file format, see
   8 *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
   9 *   https://en.wikipedia.org/wiki/Tar_(computing)#File_format
  10 *   https://www.gnu.org/software/tar/manual/html_node/Tar-Internals.html
  11 *
  12 * For writing to external program
  13 * http://www.gnu.org/software/tar/manual/html_node/Writing-to-an-External-Program.html
  14 *
  15 * Toybox will never implement the "pax" command as a matter of policy.
  16 *
  17 * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
  18 *
  19 * No --no-null because the args infrastructure isn't ready.
  20 *
  21
  22USE_TAR(NEWTOY(tar, "&(show-transformed-names)(selinux)(restrict)(full-time)(no-recursion)(null)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):~(strip-components)(strip)#~(transform)(xform)*o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*I(use-compress-program):C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
  23
  24config TAR
  25  bool "tar"
  26  default y
  27  help
  28    usage: tar [-cxt] [-fvohmjkOS] [-XTCf NAME] [--selinux] [FILE...]
  29
  30    Create, extract, or list files in a .tar (or compressed t?z) file.
  31
  32    Options:
  33    c  Create                x  Extract               t  Test (list)
  34    f  tar FILE (default -)  C  Change to DIR first   v  Verbose display
  35    o  Ignore owner          h  Follow symlinks       m  Ignore mtime
  36    J  xz compression        j  bzip2 compression     z  gzip compression
  37    O  Extract to stdout     X  exclude names in FILE T  include names in FILE
  38
  39    --exclude        FILENAME to exclude  --full-time         Show seconds with -tv
  40    --mode MODE      Adjust permissions   --owner NAME[:UID]  Set file ownership
  41    --mtime TIME     Override timestamps  --group NAME[:GID]  Set file group
  42    --sparse         Record sparse files  --selinux           Save/restore labels
  43    --restrict       All under one dir    --no-recursion      Skip dir contents
  44    --numeric-owner  Use numeric uid/gid, not user/group names
  45    --null           Filenames in -T FILE are null-separated, not newline
  46    --strip-components NUM  Ignore first NUM directory components when extracting
  47    --xform=SED      Modify filenames via SED expression (ala s/find/replace/g)
  48    -I PROG          Filter through PROG to compress or PROG -d to decompress
  49*/
  50
  51#define FOR_tar
  52#include "toys.h"
  53
  54GLOBALS(
  55  char *f, *C, *I;
  56  struct arg_list *T, *X, *xform;
  57  long strip;
  58  char *to_command, *owner, *group, *mtime, *mode;
  59  struct arg_list *exclude;
  60
  61  struct double_list *incl, *excl, *seen;
  62  struct string_list *dirs;
  63  char *cwd, **xfsed;
  64  int fd, ouid, ggid, hlc, warn, sparselen, pid;
  65  struct dev_ino archive_di;
  66  long long *sparse;
  67  time_t mtt;
  68
  69  // hardlinks seen so far (hlc many)
  70  struct {
  71    char *arg;
  72    struct dev_ino di;
  73  } *hlx;
  74
  75  // Parsed information about a tar header.
  76  struct tar_header {
  77    char *name, *link_target, *uname, *gname;
  78    long long size, ssize;
  79    uid_t uid;
  80    gid_t gid;
  81    mode_t mode;
  82    time_t mtime;
  83    dev_t device;
  84  } hdr;
  85)
  86
  87// The on-disk 512 byte record structure.
  88struct tar_hdr {
  89  char name[100], mode[8], uid[8], gid[8], size[12], mtime[12], chksum[8],
  90       type, link[100], magic[8], uname[32], gname[32], major[8], minor[8],
  91       prefix[155], padd[12];
  92};
  93
  94// Tar uses ASCII octal when it fits, base-256 otherwise.
  95static int ascii_fits(unsigned long long val, int len)
  96{
  97  return !(val>>(3*(len-1)));
  98}
  99
 100// convert from int to octal (or base-256)
 101static void itoo(char *str, int len, unsigned long long val)
 102{
 103  if (ascii_fits(val, len)) sprintf(str, "%0*llo", len-1, val);
 104  else {
 105    for (str += len; len--; val >>= 8) *--str = val;
 106    *str = 128;
 107  }
 108}
 109#define ITOO(x, y) itoo(x, sizeof(x), y)
 110
 111// convert octal (or base-256) to int
 112static unsigned long long otoi(char *str, unsigned len)
 113{
 114  unsigned long long val = 0;
 115
 116  // When tar value too big or octal, use binary encoding with high bit set
 117  if (128&*str) while (--len) {
 118    if (val<<8 < val) error_exit("bad header");
 119    val = (val<<8)+*++str;
 120  } else {
 121    while (len && *str == ' ') str++;
 122    while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
 123    if (len && *str && *str != ' ') error_exit("bad header");
 124  }
 125
 126  return val;
 127}
 128#define OTOI(x) otoi(x, sizeof(x))
 129
 130static void write_prefix_block(char *data, int len, char type)
 131{
 132  struct tar_hdr tmp;
 133
 134  memset(&tmp, 0, sizeof(tmp));
 135  sprintf(tmp.name, "././@%s", type=='x' ? "PaxHeaders" : "LongLink");
 136  ITOO(tmp.uid, 0);
 137  ITOO(tmp.gid, 0);
 138  ITOO(tmp.size, len);
 139  ITOO(tmp.mtime, 0);
 140  tmp.type = type;
 141  strcpy(tmp.magic, "ustar  ");
 142
 143  // Historical nonsense to match other implementations. Never used.
 144  ITOO(tmp.mode, 0644);
 145  strcpy(tmp.uname, "root");
 146  strcpy(tmp.gname, "root");
 147
 148  // Calculate checksum. Since 512*255 = 0377000 in octal, this can never
 149  // use more than 6 digits. The last byte is ' ' for historical reasons.
 150  itoo(tmp.chksum, sizeof(tmp.chksum)-1, tar_cksum(&tmp));
 151  tmp.chksum[7] = ' ';
 152
 153  // write header and name, padded with NUL to block size
 154  xwrite(TT.fd, &tmp, 512);
 155  xwrite(TT.fd, data, len);
 156  if (len%512) xwrite(TT.fd, toybuf, 512-(len%512));
 157}
 158
 159static void maybe_prefix_block(char *data, int check, int type)
 160{
 161  int len = strlen(data);
 162
 163  if (len>check) write_prefix_block(data, len+1, type);
 164}
 165
 166static struct double_list *filter(struct double_list *lst, char *name)
 167{
 168  struct double_list *end = lst;
 169
 170  if (lst)
 171    // constant is FNM_LEADING_DIR
 172    do if (!fnmatch(lst->data, name, 1<<3)) return lst;
 173    while (end != (lst = lst->next));
 174
 175  return 0;
 176}
 177
 178static void skippy(long long len)
 179{
 180  if (lskip(TT.fd, len)) perror_exit("EOF");
 181}
 182
 183// allocate and read data from TT.fd
 184static void alloread(void *buf, int len)
 185{
 186  // actually void **, but automatic typecasting doesn't work with void ** :(
 187  char **b = buf;
 188
 189  free(*b);
 190  *b = xmalloc(len+1);
 191  xreadall(TT.fd, *b, len);
 192  (*b)[len] = 0;
 193}
 194
 195// callback from dirtree to create archive
 196static int add_to_tar(struct dirtree *node)
 197{
 198  struct stat *st = &(node->st);
 199  struct tar_hdr hdr;
 200  struct passwd *pw = pw;
 201  struct group *gr = gr;
 202  int i, fd = -1, recurse = 0;
 203  char *name, *lnk, *hname, *xfname = 0;
 204
 205  if (!dirtree_notdotdot(node)) return 0;
 206  if (same_dev_ino(st, &TT.archive_di)) {
 207    error_msg("'%s' file is the archive; not dumped", node->name);
 208    return 0;
 209  }
 210
 211  i = 1;
 212  name = hname = dirtree_path(node, &i);
 213
 214  // exclusion defaults to --no-anchored and --wildcards-match-slash
 215  for (lnk = name; *lnk;) {
 216    if (filter(TT.excl, lnk)) goto done;
 217    while (*lnk && *lnk!='/') lnk++;
 218    while (*lnk=='/') lnk++;
 219  }
 220
 221  // Consume the 1 extra byte alocated in dirtree_path()
 222  if (S_ISDIR(st->st_mode) && name[i-1] != '/') strcat(name, "/");
 223
 224  // remove leading / and any .. entries from saved name
 225  if (!FLAG(P)) while (*hname == '/') hname++;
 226  for (lnk = hname;;) {
 227    if (!(lnk = strstr(lnk, ".."))) break;
 228    if (lnk == hname || lnk[-1] == '/') {
 229      if (!lnk[2]) goto done;
 230      if (lnk[2]=='/') {
 231        lnk = hname = lnk+3;
 232        continue;
 233      }
 234    }
 235    lnk += 2;
 236  }
 237  if (!*hname) goto done;
 238
 239  if (TT.warn && hname != name) {
 240    dprintf(2, "removing leading '%.*s' from member names\n",
 241           (int)(hname-name), name);
 242    TT.warn = 0;
 243  }
 244
 245  // Note: linux sed doesn't add newline, so no need to remove it or use -z.
 246  if (TT.xfsed)
 247    if (!(hname = xfname = xrunread(TT.xfsed, hname))) error_exit("bad xform");
 248
 249  if (TT.owner) st->st_uid = TT.ouid;
 250  if (TT.group) st->st_gid = TT.ggid;
 251  if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
 252  if (TT.mtime) st->st_mtime = TT.mtt;
 253
 254  memset(&hdr, 0, sizeof(hdr));
 255  strncpy(hdr.name, hname, sizeof(hdr.name));
 256  ITOO(hdr.mode, st->st_mode &07777);
 257  ITOO(hdr.uid, st->st_uid);
 258  ITOO(hdr.gid, st->st_gid);
 259  ITOO(hdr.size, 0); //set size later
 260  ITOO(hdr.mtime, st->st_mtime);
 261  strcpy(hdr.magic, "ustar  ");
 262
 263  // Hard link or symlink? i=0 neither, i=1 hardlink, i=2 symlink
 264
 265  // Are there hardlinks to a non-directory entry?
 266  if (st->st_nlink>1 && !S_ISDIR(st->st_mode)) {
 267    // Have we seen this dev&ino before?
 268    for (i = 0; i<TT.hlc; i++) if (same_dev_ino(st, &TT.hlx[i].di)) break;
 269    if (i != TT.hlc) {
 270      lnk = TT.hlx[i].arg;
 271      i = 1;
 272    } else {
 273      // first time we've seen it. Store as normal file, but remember it.
 274      if (!(TT.hlc&255))
 275        TT.hlx = xrealloc(TT.hlx, sizeof(*TT.hlx)*(TT.hlc+256));
 276      TT.hlx[TT.hlc].arg = xstrdup(hname);
 277      TT.hlx[TT.hlc].di.ino = st->st_ino;
 278      TT.hlx[TT.hlc].di.dev = st->st_dev;
 279      TT.hlc++;
 280      i = 0;
 281    }
 282  } else i = 0;
 283
 284  // Handle file types
 285  if (i || S_ISLNK(st->st_mode)) {
 286    hdr.type = '1'+!i;
 287    if (!i && !(lnk = xreadlink(name))) {
 288      perror_msg("readlink");
 289      goto done;
 290    }
 291    maybe_prefix_block(lnk, sizeof(hdr.link), 'K');
 292    strncpy(hdr.link, lnk, sizeof(hdr.link));
 293    if (!i) free(lnk);
 294  } else if (S_ISREG(st->st_mode)) {
 295    hdr.type = '0';
 296    ITOO(hdr.size, st->st_size);
 297  } else if (S_ISDIR(st->st_mode)) hdr.type = '5';
 298  else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
 299  else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
 300    hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
 301    ITOO(hdr.major, dev_major(st->st_rdev));
 302    ITOO(hdr.minor, dev_minor(st->st_rdev));
 303  } else {
 304    error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
 305    goto done;
 306  }
 307
 308  // write out 'x' prefix header for --selinux data
 309  if (FLAG(selinux)) {
 310    int start = 0, sz = 0, temp, len = 0;
 311    char *buf = 0, *sec = "security.selinux";
 312
 313    for (;;) {
 314      // First time get length, second time read data into prepared buffer
 315      len = (S_ISLNK(st->st_mode) ? xattr_lget : xattr_get)
 316        (name, sec, buf+start, sz);
 317
 318      // Handle data or error
 319      if (len>999999 || (sz && len>sz)) len = -1, errno = E2BIG;
 320      if (buf || len<1) {
 321        if (len>0) {
 322          strcpy(buf+start+sz, "\n");
 323          write_prefix_block(buf, start+sz+2, 'x');
 324        } else if (errno==ENODATA || errno==ENOTSUP) len = 0;
 325        if (len) perror_msg("getfilecon %s", name);
 326
 327        free(buf);
 328        break;
 329      }
 330
 331      // Allocate buffer. Length includes prefix: calculate twice (wrap 99->100)
 332      temp = snprintf(0, 0, "%d", sz = (start = 22)+len+1);
 333      start += temp + (temp != snprintf(0, 0, "%d", temp+sz));
 334      buf = xmprintf("%u RHT.%s=%.*s", start+len+1, sec, sz = len, "");
 335    }
 336  }
 337
 338  maybe_prefix_block(hname, sizeof(hdr.name), 'L');
 339  if (!FLAG(numeric_owner)) {
 340    if ((TT.owner || (pw = bufgetpwuid(st->st_uid))) &&
 341        ascii_fits(st->st_uid, sizeof(hdr.uid)))
 342      strncpy(hdr.uname, TT.owner ? : pw->pw_name, sizeof(hdr.uname));
 343    if ((TT.group || (gr = bufgetgrgid(st->st_gid))) &&
 344        ascii_fits(st->st_gid, sizeof(hdr.gid)))
 345      strncpy(hdr.gname, TT.group ? : gr->gr_name, sizeof(hdr.gname));
 346  }
 347
 348  TT.sparselen = 0;
 349  if (hdr.type == '0') {
 350    // Before we write the header, make sure we can read the file
 351    if ((fd = open(name, O_RDONLY)) < 0) {
 352      perror_msg("can't open '%s'", name);
 353      free(name);
 354
 355      return 0;
 356    }
 357    if (FLAG(S)) {
 358      long long lo, ld = 0, len = 0;
 359
 360      // Enumerate the extents
 361      while ((lo = lseek(fd, ld, SEEK_HOLE)) != -1) {
 362        if (!(TT.sparselen&511))
 363          TT.sparse = xrealloc(TT.sparse, (TT.sparselen+514)*sizeof(long long));
 364        if (ld != lo) {
 365          TT.sparse[TT.sparselen++] = ld;
 366          len += TT.sparse[TT.sparselen++] = lo-ld;
 367        }
 368        if (lo == st->st_size || (ld = lseek(fd, lo, SEEK_DATA)) < lo) break;
 369      }
 370
 371      // If there were extents, change type to S record
 372      if (TT.sparselen>2) {
 373        TT.sparse[TT.sparselen++] = st->st_size;
 374        TT.sparse[TT.sparselen++] = 0;
 375        hdr.type = 'S';
 376        lnk = (char *)&hdr;
 377        for (i = 0; i<TT.sparselen && i<8; i++)
 378          itoo(lnk+386+12*i, 12, TT.sparse[i]);
 379
 380        // Record if there's overflow records, change length to sparse length,
 381        // record apparent length
 382        if (TT.sparselen>8) lnk[482] = 1;
 383        itoo(lnk+483, 12, st->st_size);
 384        ITOO(hdr.size, len);
 385      } else TT.sparselen = 0;
 386      lseek(fd, 0, SEEK_SET);
 387    }
 388  }
 389
 390  itoo(hdr.chksum, sizeof(hdr.chksum)-1, tar_cksum(&hdr));
 391  hdr.chksum[7] = ' ';
 392
 393  if (FLAG(v)) dprintf(1+(TT.fd==1), "%s\n", hname);
 394
 395  // Write header and data to archive
 396  xwrite(TT.fd, &hdr, 512);
 397  if (TT.sparselen>8) {
 398    char buf[512];
 399
 400    // write extent overflow blocks
 401    for (i=8;;i++) {
 402      int j = (i-8)%42;
 403
 404      if (!j || i==TT.sparselen) {
 405        if (i!=8) {
 406          if (i!=TT.sparselen) buf[504] = 1;
 407          xwrite(TT.fd, buf, 512);
 408        }
 409        if (i==TT.sparselen) break;
 410        memset(buf, 0, sizeof(buf));
 411      }
 412      itoo(buf+12*j, 12, TT.sparse[i]);
 413    }
 414  }
 415  TT.sparselen >>= 1;
 416  if (hdr.type == '0' || hdr.type == 'S') {
 417    if (hdr.type == '0') xsendfile_pad(fd, TT.fd, st->st_size);
 418    else for (i = 0; i<TT.sparselen; i++) {
 419      if (TT.sparse[i*2] != lseek(fd, TT.sparse[i*2], SEEK_SET))
 420        perror_msg("%s: seek %lld", name, TT.sparse[i*2]);
 421      xsendfile_pad(fd, TT.fd, TT.sparse[i*2+1]);
 422    }
 423    if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
 424    close(fd);
 425  }
 426  recurse = !FLAG(no_recursion);
 427
 428done:
 429  free(xfname);
 430  free(name);
 431
 432  return recurse*(DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0));
 433}
 434
 435static void wsettime(char *s, long long sec)
 436{
 437  struct timespec times[2] = {{sec, 0},{sec, 0}};
 438
 439  if (utimensat(AT_FDCWD, s, times, AT_SYMLINK_NOFOLLOW))
 440    perror_msg("settime %lld %s", sec, s);
 441}
 442
 443// Do pending directory utimes(), NULL to flush all.
 444static int dirflush(char *name, int isdir)
 445{
 446  char *s = 0, *ss;
 447
 448  // Barf if name not in TT.cwd
 449  if (name) {
 450    if (!(ss = s = xabspath(name, isdir ? ABS_LAST : 0))) {
 451      error_msg("'%s' bad symlink", name);
 452
 453      return 1;
 454    }
 455    if (TT.cwd[1] && (!strstart(&ss, TT.cwd) || (*ss && *ss!='/'))) {
 456      error_msg("'%s' %s not under '%s'", name, s, TT.cwd);
 457      free(s);
 458
 459      return 1;
 460    }
 461
 462    // --restrict means first entry extracted is what everything must be under
 463    if (FLAG(restrict)) {
 464      free(TT.cwd);
 465      TT.cwd = strdup(s);
 466      toys.optflags ^= FLAG_restrict;
 467    }
 468    // use resolved name so trailing / is stripped
 469    if (isdir) unlink(s);
 470  }
 471
 472  // Set deferred utimes() for directories this file isn't under.
 473  // (Files must be depth-first ordered in tarball for this to matter.)
 474  while (TT.dirs) {
 475
 476    // If next file is under (or equal to) this dir, keep waiting
 477    if (name && strstart(&ss, ss = s) && (!*ss || *ss=='/')) break;
 478
 479    wsettime(TT.dirs->str+sizeof(long long), *(long long *)TT.dirs->str);
 480    free(llist_pop(&TT.dirs));
 481  }
 482  free(s);
 483
 484  // name was under TT.cwd
 485  return 0;
 486}
 487
 488// write data to file
 489static void sendfile_sparse(int fd)
 490{
 491  long long len, used = 0, sent;
 492  int i = 0, j;
 493
 494  do {
 495    if (TT.sparselen) {
 496      // Seek past holes or fill output with zeroes.
 497      if (-1 == lseek(fd, len = TT.sparse[i*2], SEEK_SET)) {
 498        sent = 0;
 499        while (len) {
 500          // first/last 512 bytes used, rest left zeroes
 501          j = (len>3072) ? 3072 : len;
 502          if (j != writeall(fd, toybuf+512, j)) goto error;
 503          len -= j;
 504        }
 505      } else {
 506        sent = len;
 507        if (!(len = TT.sparse[i*2+1]) && ftruncate(fd, sent+len))
 508          perror_msg("ftruncate");
 509      }
 510      if (len+used>TT.hdr.size) error_exit("sparse overflow");
 511    } else len = TT.hdr.size;
 512
 513    len -= sendfile_len(TT.fd, fd, len, &sent);
 514    used += sent;
 515    if (len) {
 516error:
 517      if (fd!=1) perror_msg(0);
 518      skippy(TT.hdr.size-used);
 519
 520      break;
 521    }
 522  } while (++i<TT.sparselen);
 523
 524  close(fd);
 525}
 526
 527static void extract_to_disk(char *name)
 528{
 529  int ala = TT.hdr.mode;
 530
 531  if (dirflush(name, S_ISDIR(ala))) {
 532    if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size);
 533 
 534    return;
 535  }
 536
 537  // create path before file if necessary
 538  if (strrchr(name, '/') && mkpath(name) && errno!=EEXIST)
 539      return perror_msg(":%s: can't mkdir", name);
 540
 541  // remove old file, if exists
 542  if (!FLAG(k) && !S_ISDIR(ala) && rmdir(name) && errno!=ENOENT && unlink(name))
 543    return perror_msg("can't remove: %s", name);
 544
 545  if (S_ISREG(ala)) {
 546    // hardlink?
 547    if (TT.hdr.link_target) {
 548      if (link(TT.hdr.link_target, name))
 549        return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
 550    // write contents
 551    } else {
 552      int fd = WARN_ONLY|O_WRONLY|O_CREAT|(FLAG(overwrite) ? O_TRUNC : O_EXCL);
 553
 554      if ((fd = xcreate(name, fd, ala&07777)) != -1) sendfile_sparse(fd);
 555      else return skippy(TT.hdr.size);
 556    }
 557  } else if (S_ISDIR(ala)) {
 558    if ((mkdir(name, 0700) == -1) && errno != EEXIST)
 559      return perror_msg("%s: can't create", name);
 560  } else if (S_ISLNK(ala)) {
 561    if (symlink(TT.hdr.link_target, name))
 562      return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
 563  } else if (mknod(name, ala, TT.hdr.device))
 564    return perror_msg("can't create '%s'", name);
 565
 566  // Set ownership
 567  if (!FLAG(o) && !geteuid()) {
 568    int u = TT.hdr.uid, g = TT.hdr.gid;
 569
 570    if (TT.owner) TT.hdr.uid = TT.ouid;
 571    else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
 572      struct passwd *pw = bufgetpwnamuid(TT.hdr.uname, 0);
 573      if (pw) TT.hdr.uid = pw->pw_uid;
 574    }
 575
 576    if (TT.group) TT.hdr.gid = TT.ggid;
 577    else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
 578      struct group *gr = bufgetgrnamgid(TT.hdr.gname, 0);
 579      if (gr) TT.hdr.gid = gr->gr_gid;
 580    }
 581
 582    if (lchown(name, u, g)) perror_msg("chown %d:%d '%s'", u, g, name);;
 583  }
 584
 585  if (!S_ISLNK(ala)) chmod(name, FLAG(p) ? ala : ala&0777);
 586
 587  // Apply mtime.
 588  if (!FLAG(m)) {
 589    if (S_ISDIR(ala)) {
 590      struct string_list *sl;
 591
 592      // Writing files into a directory changes directory timestamps, so
 593      // defer mtime updates until contents written.
 594
 595      sl = xmalloc(sizeof(struct string_list)+sizeof(long long)+strlen(name)+1);
 596      *(long long *)sl->str = TT.hdr.mtime;
 597      strcpy(sl->str+sizeof(long long), name);
 598      sl->next = TT.dirs;
 599      TT.dirs = sl;
 600    } else wsettime(name, TT.hdr.mtime);
 601  }
 602}
 603
 604static void unpack_tar(char *first)
 605{
 606  struct double_list *walk, *delete;
 607  struct tar_hdr tar;
 608  int i, sefd = -1, and = 0;
 609  unsigned maj, min;
 610  char *s, *name;
 611
 612  for (;;) {
 613    if (first) {
 614      memcpy(&tar, first, i = 512);
 615      first = 0;
 616    } else {
 617      // align to next block and read it
 618      if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
 619      i = readall(TT.fd, &tar, 512);
 620    }
 621
 622    if (i && i!=512) error_exit("short header");
 623
 624    // Two consecutive empty headers ends tar even if there's more data
 625    if (!i || !*tar.name) {
 626      if (!i || and++) return;
 627      TT.hdr.size = 0;
 628      continue;
 629    }
 630    // ensure null temination even of pathological packets
 631    tar.padd[0] = and = 0;
 632
 633    // Is this a valid TAR header?
 634    if (!is_tar_header(&tar)) error_exit("bad header");
 635    TT.hdr.size = OTOI(tar.size);
 636
 637    // If this header isn't writing something to the filesystem
 638    if ((tar.type<'0' || tar.type>'7') && tar.type!='S'
 639        && (*tar.magic && tar.type))
 640    {
 641      // Skip to next record if unknown type or payload > 1 megabyte
 642      if (!strchr("KLx", tar.type) || TT.hdr.size>1<<20) skippy(TT.hdr.size);
 643      // Read link or long name
 644      else if (tar.type != 'x')
 645        alloread(tar.type=='K'?&TT.hdr.link_target:&TT.hdr.name, TT.hdr.size);
 646      // Loop through 'x' payload records in "LEN NAME=VALUE\n" format
 647      else {
 648        char *p, *pp, *buf = 0;
 649        unsigned i, len, n;
 650
 651        alloread(&buf, TT.hdr.size);
 652        for (p = buf; (p-buf)<TT.hdr.size; p += len) {
 653          i = TT.hdr.size-(p-buf);
 654          if (1!=sscanf(p, "%u %n", &len, &n) || len<n+4 || len>i || n>i) {
 655            error_msg("bad header");
 656            break;
 657          }
 658          p[len-1] = 0;
 659          pp = p+n;
 660          // Ignore "RHT." prefix, if any.
 661          strstart(&pp, "RHT.");
 662          if ((FLAG(selinux) && !(FLAG(t)|FLAG(O)))
 663              && strstart(&pp, "security.selinux="))
 664          {
 665            i = strlen(pp);
 666            sefd = xopen("/proc/self/attr/fscreate", O_WRONLY|WARN_ONLY);
 667            if (sefd==-1 ||  i!=write(sefd, pp, i))
 668              perror_msg("setfscreatecon %s", pp);
 669          } else if (strstart(&pp, "path=")) {
 670            free(TT.hdr.name);
 671            TT.hdr.name = xstrdup(pp);
 672            break;
 673          }
 674        }
 675        free(buf);
 676      }
 677
 678      continue;
 679    }
 680
 681    // Handle sparse file type
 682    TT.sparselen = 0;
 683    if (tar.type == 'S') {
 684      char sparse[512];
 685      int max = 8;
 686
 687      // Load 4 pairs of offset/len from S block, plus 21 pairs from each
 688      // continuation block, list says where to seek/write sparse file contents
 689      s = 386+(char *)&tar;
 690      *sparse = i = 0;
 691
 692      for (;;) {
 693        if (!(TT.sparselen&511))
 694          TT.sparse = xrealloc(TT.sparse, (TT.sparselen+512)*sizeof(long long));
 695
 696        // If out of data in block check continue flag, stop or load next block
 697        if (++i>max || !*s) {
 698          if (!(*sparse ? sparse[504] : ((char *)&tar)[482])) break;
 699          xreadall(TT.fd, s = sparse, 512);
 700          max = 41;
 701          i = 0;
 702        }
 703        // Load next entry
 704        TT.sparse[TT.sparselen++] = otoi(s, 12);
 705        s += 12;
 706      }
 707
 708      // Odd number of entries (from corrupted tar) would be dropped here
 709      TT.sparselen /= 2;
 710      if (TT.sparselen)
 711        TT.hdr.ssize = TT.sparse[2*TT.sparselen-1]+TT.sparse[2*TT.sparselen-2];
 712    } else TT.hdr.ssize = TT.hdr.size;
 713
 714    // At this point, we have something to output. Convert metadata.
 715    TT.hdr.mode = OTOI(tar.mode)&0xfff;
 716    if (tar.type == 'S' || !tar.type) TT.hdr.mode |= 0x8000;
 717    else TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
 718    TT.hdr.uid = OTOI(tar.uid);
 719    TT.hdr.gid = OTOI(tar.gid);
 720    TT.hdr.mtime = OTOI(tar.mtime);
 721    maj = OTOI(tar.major);
 722    min = OTOI(tar.minor);
 723    TT.hdr.device = dev_makedev(maj, min);
 724    TT.hdr.uname = xstrndup(TT.owner ? : tar.uname, sizeof(tar.uname));
 725    TT.hdr.gname = xstrndup(TT.group ? : tar.gname, sizeof(tar.gname));
 726
 727    if (TT.owner) TT.hdr.uid = TT.ouid;
 728    else if (!FLAG(numeric_owner)) {
 729      struct passwd *pw = bufgetpwnamuid(TT.hdr.uname, 0);
 730      if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
 731    }
 732
 733    if (TT.group) TT.hdr.gid = TT.ggid;
 734    else if (!FLAG(numeric_owner)) {
 735      struct group *gr = bufgetgrnamgid(TT.hdr.gname, 0);
 736      if (gr) TT.hdr.gid = gr->gr_gid;
 737    }
 738
 739    if (!TT.hdr.link_target && *tar.link)
 740      TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
 741    if (!TT.hdr.name) {
 742      // Glue prefix and name fields together with / if necessary
 743      i = (tar.type=='S') ? 0 : strnlen(tar.prefix, sizeof(tar.prefix));
 744      TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
 745        (i && tar.prefix[i-1] != '/') ? "/" : "",
 746        (int)sizeof(tar.name), tar.name);
 747    }
 748
 749    // Old broken tar recorded dir as "file with trailing slash"
 750    if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
 751      *s = 0;
 752      TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
 753    }
 754
 755    // Non-regular files don't have contents stored in archive.
 756    if ((TT.hdr.link_target && *TT.hdr.link_target)
 757      || (tar.type && !S_ISREG(TT.hdr.mode)))
 758        TT.hdr.size = 0;
 759
 760    // Files are seen even if excluded, so check them here.
 761    // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
 762
 763    if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
 764      if (!TT.seen) TT.seen = delete;
 765
 766      // Move seen entry to end of list.
 767      if (TT.incl == delete) TT.incl = TT.incl->next;
 768      else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
 769        if (walk == delete) {
 770          dlist_pop(&walk);
 771          dlist_add_nomalloc(&TT.incl, delete);
 772        }
 773      }
 774    }
 775
 776    // Skip excluded files, filtering on the untransformed name.
 777    if (filter(TT.excl, name = TT.hdr.name) || (TT.incl && !delete)) {
 778      skippy(TT.hdr.size);
 779      goto done;
 780    }
 781
 782    // We accept --show-transformed but always do, so it's a NOP.
 783    name = TT.hdr.name;
 784    if (TT.xfsed) {
 785      if (!(name = xrunread(TT.xfsed, name))) error_exit("bad xform");
 786      free(TT.hdr.name);
 787      TT.hdr.name = name;
 788    }
 789
 790    for (i = 0; i<TT.strip; i++) {
 791      char *s = strchr(name, '/');
 792
 793      if (s && s[1]) name = s+1;
 794      else {
 795        if (S_ISDIR(TT.hdr.mode)) *name = 0;
 796        break;
 797      }
 798    }
 799
 800    if (!*name) skippy(TT.hdr.size);
 801    else if (FLAG(t)) {
 802      if (FLAG(v)) {
 803        struct tm *lc = localtime(TT.mtime ? &TT.mtt : &TT.hdr.mtime);
 804        char perm[12], gname[12];
 805
 806        mode_to_string(TT.hdr.mode, perm);
 807        printf("%s", perm);
 808        sprintf(perm, "%u", TT.hdr.uid);
 809        sprintf(gname, "%u", TT.hdr.gid);
 810        printf(" %s/%s ", *TT.hdr.uname ? TT.hdr.uname : perm,
 811          *TT.hdr.gname ? TT.hdr.gname : gname);
 812        if (tar.type=='3' || tar.type=='4') printf("%u,%u", maj, min);
 813        else printf("%9lld", TT.hdr.ssize);
 814        sprintf(perm, ":%02d", lc->tm_sec);
 815        printf("  %d-%02d-%02d %02d:%02d%s ", 1900+lc->tm_year, 1+lc->tm_mon,
 816          lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : "");
 817      }
 818      printf("%s", name);
 819      if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
 820      xputc('\n');
 821      skippy(TT.hdr.size);
 822    } else {
 823      if (FLAG(v)) printf("%s\n", name);
 824      if (FLAG(O)) sendfile_sparse(1);
 825      else if (FLAG(to_command)) {
 826        if (S_ISREG(TT.hdr.mode)) {
 827          int fd, pid;
 828
 829          xsetenv("TAR_FILETYPE", "f");
 830          xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0);
 831          xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0);
 832          xsetenv("TAR_FILENAME", name);
 833          xsetenv("TAR_UNAME", TT.hdr.uname);
 834          xsetenv("TAR_GNAME", TT.hdr.gname);
 835          xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0);
 836          xsetenv(xmprintf("TAR_UID=%o", TT.hdr.uid), 0);
 837          xsetenv(xmprintf("TAR_GID=%o", TT.hdr.gid), 0);
 838
 839          pid = xpopen((char *[]){"sh", "-c", TT.to_command, NULL}, &fd, 0);
 840          // todo: short write exits tar here, other skips data.
 841          sendfile_sparse(fd);
 842          fd = xpclose_both(pid, 0);
 843          if (fd) error_msg("%d: Child returned %d", pid, fd);
 844        }
 845      } else extract_to_disk(name);
 846    }
 847
 848done:
 849    if (sefd != -1) {
 850      // zero length write resets fscreate context to default
 851      (void)write(sefd, 0, 0);
 852      close(sefd);
 853      sefd = -1;
 854    }
 855    free(TT.hdr.name);
 856    free(TT.hdr.link_target);
 857    free(TT.hdr.uname);
 858    free(TT.hdr.gname);
 859    TT.hdr.name = TT.hdr.link_target = 0;
 860  }
 861}
 862
 863// Add copy of filename (minus trailing \n and /) to dlist **
 864static void trim2list(void *list, char *pline)
 865{
 866  char *n = xstrdup(pline);
 867  int i = strlen(n);
 868
 869  dlist_add(list, n);
 870  if (i && n[i-1]=='\n') i--;
 871  while (i && n[i-1] == '/') i--;
 872  n[i] = 0;
 873}
 874
 875// do_lines callback, selects TT.incl or TT.excl based on call order
 876static void do_XT(char **pline, long len)
 877{
 878  if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline);
 879}
 880
 881void tar_main(void)
 882{
 883  char *s, **args = toys.optargs,
 884    *archiver = FLAG(I) ? TT.I : (FLAG(z) ? "gzip" : (FLAG(J) ? "xz":"bzip2"));
 885  int len = 0, ii;
 886
 887  // Needed when extracting to command
 888  signal(SIGPIPE, SIG_IGN);
 889
 890  // Get possible early errors out of the way
 891  if (!geteuid()) toys.optflags |= FLAG_p;
 892  if (TT.owner) {
 893    if (!(s = strchr(TT.owner, ':'))) TT.ouid = xgetuid(TT.owner);
 894    else {
 895      TT.owner = xstrndup(TT.owner, s++-TT.owner);
 896      TT.ouid = atolx_range(s, 0, INT_MAX);
 897    }
 898  }
 899  if (TT.group) {
 900    if (!(s = strchr(TT.group, ':'))) TT.ggid = xgetgid(TT.group);
 901    else {
 902      TT.group = xstrndup(TT.group, s++-TT.group);
 903      TT.ggid = atolx_range(s, 0, INT_MAX);
 904    }
 905  }
 906  if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
 907
 908  // Collect file list.
 909  for (; TT.exclude; TT.exclude = TT.exclude->next)
 910    trim2list(&TT.excl, TT.exclude->arg);
 911  for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', do_XT);
 912  for (args = toys.optargs; *args; args++) trim2list(&TT.incl, *args);
 913  for (;TT.T; TT.T = TT.T->next)
 914    do_lines(xopenro(TT.T->arg), FLAG(null) ? '\0' : '\n', do_XT);
 915
 916  // If include file list empty, don't create empty archive
 917  if (FLAG(c)) {
 918    if (!TT.incl) error_exit("empty archive");
 919    TT.fd = 1;
 920  }
 921
 922  if (TT.xform) {
 923    struct arg_list *al;
 924
 925    for (ii = 0, al = TT.xform; al; al = al->next) ii++;
 926    TT.xfsed = xmalloc((ii+1)*2*sizeof(char *));
 927    TT.xfsed[0] = "sed";
 928    for (ii = 1, al = TT.xform; al; al = al->next) {
 929      TT.xfsed[ii++] = "-e";
 930      TT.xfsed[ii++] = al->arg;
 931    }
 932    TT.xfsed[ii] = 0;
 933  }
 934
 935  // nommu reentry for nonseekable input skips this, parent did it for us
 936  if (toys.stacktop) {
 937    if (TT.f && strcmp(TT.f, "-"))
 938      TT.fd = xcreate(TT.f, TT.fd*(O_WRONLY|O_CREAT|O_TRUNC), 0666);
 939    // Get destination directory
 940    if (TT.C) xchdir(TT.C);
 941  }
 942
 943  // Get destination directory
 944  TT.cwd = xabspath(s = xgetcwd(), ABS_PATH);
 945  free(s);
 946
 947  // Remember archive inode so we don't overwrite it or add it to itself
 948  {
 949    struct stat st;
 950
 951    if (!fstat(TT.fd, &st)) {
 952      TT.archive_di.ino = st.st_ino;
 953      TT.archive_di.dev = st.st_dev;
 954    }
 955  }
 956
 957  // Are we reading?
 958  if (FLAG(x)||FLAG(t)) {
 959    char *hdr = 0;
 960
 961    // autodetect compression type when not specified
 962    if (!(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J))) {
 963      len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512);
 964      if (len!=512 || !is_tar_header(hdr)) {
 965        // detect gzip and bzip signatures
 966        if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z;
 967        else if (!memcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j;
 968        else if (peek_be(hdr, 7) == 0xfd377a585a0000UL) toys.optflags |= FLAG_J;
 969        else error_exit("Not tar");
 970
 971        // if we can seek back we don't need to loop and copy data
 972        if (!lseek(TT.fd, -len, SEEK_CUR)) hdr = 0;
 973      }
 974    }
 975
 976    if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
 977      int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid;
 978      struct string_list *zcat = FLAG(I) ? 0 : find_in_path(getenv("PATH"),
 979        FLAG(j) ? "bzcat" : FLAG(J) ? "xzcat" : "zcat");
 980
 981      // Toybox provides more decompressors than compressors, so try them first
 982      TT.pid = xpopen_both(zcat ? (char *[]){zcat->str, 0} :
 983        (char *[]){archiver, "-d", 0}, pipefd);
 984      if (CFG_TOYBOX_FREE) llist_traverse(zcat, free);
 985
 986      if (!hdr) {
 987        // If we could seek, child gzip inherited fd and we read its output
 988        close(TT.fd);
 989        TT.fd = pipefd[1];
 990
 991      } else {
 992
 993        // If we autodetected type but then couldn't lseek to put the data back
 994        // we have to loop reading data from TT.fd and pass it to gzip ourselves
 995        // (starting with the block of data we read to autodetect).
 996
 997        // dirty trick: move gzip input pipe to stdin so child closes spare copy
 998        dup2(pipefd[0], 0);
 999        if (pipefd[0]) close(pipefd[0]);
1000
1001        // Fork a copy of ourselves to handle extraction (reads from zip output
1002        // pipe, writes to stdout).
1003        pipefd[0] = pipefd[1];
1004        pipefd[1] = 1;
1005        pid = xpopen_both(0, pipefd);
1006        close(pipefd[1]);
1007
1008        // loop writing collated data to zip proc
1009        xwrite(0, hdr, len);
1010        for (;;) {
1011          if ((i = read(TT.fd, toybuf, sizeof(toybuf)))<1) {
1012            close(0);
1013            xwaitpid(pid);
1014            return;
1015          }
1016          xwrite(0, toybuf, i);
1017        }
1018      }
1019    }
1020
1021    unpack_tar(hdr);
1022    dirflush(0, 0);
1023    // Shut up archiver about inability to write all trailing NULs to pipe buf
1024    if (TT.pid>0) kill(TT.pid, 9);
1025
1026    // Each time a TT.incl entry is seen it's moved to the end of the list,
1027    // with TT.seen pointing to first seen list entry. Anything between
1028    // TT.incl and TT.seen wasn't encountered in archive..
1029    if (TT.seen != TT.incl) {
1030      if (!TT.seen) TT.seen = TT.incl;
1031      while (TT.incl != TT.seen) {
1032        error_msg("'%s' not in archive", TT.incl->data);
1033        TT.incl = TT.incl->next;
1034      }
1035    }
1036
1037  // are we writing? (Don't have to test flag here, one of 3 must be set)
1038  } else {
1039    struct double_list *dl = TT.incl;
1040
1041    // autodetect compression type based on -f name. (Use > to avoid.)
1042    if (TT.f && !FLAG(j) && !FLAG(z) && !FLAG(I) && !FLAG(J)) {
1043      char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"};
1044      if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz"))
1045        toys.optflags |= FLAG_z;
1046      if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz"))
1047        toys.optflags |= FLAG_J;
1048      else for (len = 0; len<ARRAY_LEN(tbz); len++)
1049        if (strend(TT.f, tbz[len])) toys.optflags |= FLAG_j;
1050    }
1051
1052    if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
1053      int pipefd[2] = {-1, TT.fd};
1054
1055      xpopen_both((char *[]){archiver, 0}, pipefd);
1056      close(TT.fd);
1057      TT.fd = pipefd[0];
1058    }
1059    do {
1060      TT.warn = 1;
1061      dirtree_flagread(dl->data, FLAG(h) ? DIRTREE_SYMFOLLOW : 0, add_to_tar);
1062    } while (TT.incl != (dl = dl->next));
1063
1064    writeall(TT.fd, toybuf, 1024);
1065  }
1066  if (toys.exitval) error_msg("had errors");
1067
1068  if (CFG_TOYBOX_FREE) {
1069    llist_traverse(TT.excl, llist_free_double);
1070    llist_traverse(TT.incl, llist_free_double);
1071    while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
1072    free(TT.hlx);
1073    free(TT.cwd);
1074    close(TT.fd);
1075  }
1076}
1077