busybox/archival/libunarchive/get_header_tar.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
   3 *
   4 *  FIXME:
   5 *    In privileged mode if uname and gname map to a uid and gid then use the
   6 *    mapped value instead of the uid/gid values in tar header
   7 *
   8 *  References:
   9 *    GNU tar and star man pages,
  10 *    Opengroup's ustar interchange format,
  11 *      http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
  12 */
  13
  14#include "libbb.h"
  15#include "unarchive.h"
  16
  17/*
  18 * GNU tar uses "base-256 encoding" for very large numbers (>8 billion).
  19 * Encoding is binary, with highest bit always set as a marker
  20 * and sign in next-highest bit:
  21 * 80 00 .. 00 - zero
  22 * bf ff .. ff - largest positive number
  23 * ff ff .. ff - minus 1
  24 * c0 00 .. 00 - smallest negative number
  25 *
  26 * We expect it only in size field, where negative numbers don't make sense.
  27 */
  28static off_t getBase256_len12(const char *str)
  29{
  30        off_t value;
  31        int len;
  32
  33        /* if (*str & 0x40) error; - caller prevents this */
  34
  35        if (sizeof(off_t) >= 12) {
  36                /* Probably 128-bit (16 byte) off_t. Can be optimized. */
  37                len = 12;
  38                value = *str++ & 0x3f;
  39                while (--len)
  40                        value = (value << 8) + (unsigned char) *str++;
  41                return value;
  42        }
  43
  44#ifdef CHECK_FOR_OVERFLOW
  45        /* Can be optimized to eat 32-bit chunks */
  46        char c = *str++ & 0x3f;
  47        len = 12;
  48        while (1) {
  49                if (c)
  50                        bb_error_msg_and_die("overflow in base-256 encoded file size");
  51                if (--len == sizeof(off_t))
  52                        break;
  53                c = *str++;
  54        }
  55#else
  56        str += (12 - sizeof(off_t));
  57#endif
  58
  59/* Now str points to sizeof(off_t) least significant bytes.
  60 *
  61 * Example of tar file with 8914993153 (0x213600001) byte file.
  62 * Field starts at offset 7c:
  63 * 00070  30 30 30 00 30 30 30 30  30 30 30 00 80 00 00 00  |000.0000000.....|
  64 * 00080  00 00 00 02 13 60 00 01  31 31 31 32 30 33 33 36  |.....`..11120336|
  65 *
  66 * str is at offset 80 or 84 now (64-bit or 32-bit off_t).
  67 * We (ab)use the fact that value happens to be aligned,
  68 * and fetch it in one go:
  69 */
  70        if (sizeof(off_t) == 8) {
  71                value = *(off_t*)str;
  72                value = SWAP_BE64(value);
  73        } else if (sizeof(off_t) == 4) {
  74                value = *(off_t*)str;
  75                value = SWAP_BE32(value);
  76        } else {
  77                value = 0;
  78                len = sizeof(off_t);
  79                while (--len)
  80                        value = (value << 8) + (unsigned char) *str++;
  81        }
  82        return value;
  83}
  84
  85/* NB: _DESTROYS_ str[len] character! */
  86static unsigned long long getOctal(char *str, int len)
  87{
  88        unsigned long long v;
  89        /* NB: leading spaces are allowed. Using strtoull to handle that.
  90         * The downside is that we accept e.g. "-123" too :(
  91         */
  92        str[len] = '\0';
  93        v = strtoull(str, &str, 8);
  94        /* std: "Each numeric field is terminated by one or more
  95         * <space> or NUL characters". We must support ' '! */
  96        if (*str != '\0' && *str != ' ')
  97                bb_error_msg_and_die("corrupted octal value in tar header");
  98        return v;
  99}
 100#define GET_OCTAL(a) getOctal((a), sizeof(a))
 101
 102void BUG_tar_header_size(void);
 103char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 104{
 105        file_header_t *file_header = archive_handle->file_header;
 106        struct {
 107                /* ustar header, Posix 1003.1 */
 108                char name[100];     /*   0-99 */
 109                char mode[8];       /* 100-107 */
 110                char uid[8];        /* 108-115 */
 111                char gid[8];        /* 116-123 */
 112                char size[12];      /* 124-135 */
 113                char mtime[12];     /* 136-147 */
 114                char chksum[8];     /* 148-155 */
 115                char typeflag;      /* 156-156 */
 116                char linkname[100]; /* 157-256 */
 117                /* POSIX:   "ustar" NUL "00" */
 118                /* GNU tar: "ustar  " NUL */
 119                /* Normally it's defined as magic[6] followed by
 120                 * version[2], but we put them together to simplify code
 121                 */
 122                char magic[8];      /* 257-264 */
 123                char uname[32];     /* 265-296 */
 124                char gname[32];     /* 297-328 */
 125                char devmajor[8];   /* 329-336 */
 126                char devminor[8];   /* 337-344 */
 127                char prefix[155];   /* 345-499 */
 128                char padding[12];   /* 500-512 */
 129        } tar;
 130        char *cp;
 131        int i, sum_u, sum;
 132#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
 133        int sum_s;
 134#endif
 135        int parse_names;
 136
 137        /* Our "private data" */
 138#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
 139# define p_longname (archive_handle->tar__longname)
 140# define p_linkname (archive_handle->tar__linkname)
 141#else
 142# define p_longname 0
 143# define p_linkname 0
 144#endif
 145
 146        if (sizeof(tar) != 512)
 147                BUG_tar_header_size();
 148
 149#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
 150 again:
 151#endif
 152        /* Align header */
 153        data_align(archive_handle, 512);
 154
 155 again_after_align:
 156
 157#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT
 158        /* to prevent misdetection of bz2 sig */
 159        *(uint32_t*)(&tar) = 0;
 160        i = full_read(archive_handle->src_fd, &tar, 512);
 161        /* If GNU tar sees EOF in above read, it says:
 162         * "tar: A lone zero block at N", where N = kilobyte
 163         * where EOF was met (not EOF block, actual EOF!),
 164         * and exits with EXIT_SUCCESS.
 165         * We will mimic exit(EXIT_SUCCESS), although we will not mimic
 166         * the message and we don't check whether we indeed
 167         * saw zero block directly before this. */
 168        if (i == 0) {
 169                xfunc_error_retval = 0;
 170 short_read:
 171                bb_error_msg_and_die("short read");
 172        }
 173        if (i != 512) {
 174                IF_FEATURE_TAR_AUTODETECT(goto autodetect;)
 175                goto short_read;
 176        }
 177
 178#else
 179        i = 512;
 180        xread(archive_handle->src_fd, &tar, i);
 181#endif
 182        archive_handle->offset += i;
 183
 184        /* If there is no filename its an empty header */
 185        if (tar.name[0] == 0 && tar.prefix[0] == 0) {
 186                if (archive_handle->tar__end) {
 187                        /* Second consecutive empty header - end of archive.
 188                         * Read until the end to empty the pipe from gz or bz2
 189                         */
 190                        while (full_read(archive_handle->src_fd, &tar, 512) == 512)
 191                                continue;
 192                        return EXIT_FAILURE;
 193                }
 194                archive_handle->tar__end = 1;
 195                return EXIT_SUCCESS;
 196        }
 197        archive_handle->tar__end = 0;
 198
 199        /* Check header has valid magic, "ustar" is for the proper tar,
 200         * five NULs are for the old tar format  */
 201        if (strncmp(tar.magic, "ustar", 5) != 0
 202         && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
 203             || memcmp(tar.magic, "\0\0\0\0", 5) != 0)
 204        ) {
 205#if ENABLE_FEATURE_TAR_AUTODETECT
 206                char FAST_FUNC (*get_header_ptr)(archive_handle_t *);
 207
 208 autodetect:
 209                /* tar gz/bz autodetect: check for gz/bz2 magic.
 210                 * If we see the magic, and it is the very first block,
 211                 * we can switch to get_header_tar_gz/bz2/lzma().
 212                 * Needs seekable fd. I wish recv(MSG_PEEK) works
 213                 * on any fd... */
 214#if ENABLE_FEATURE_SEAMLESS_GZ
 215                if (tar.name[0] == 0x1f && tar.name[1] == (char)0x8b) { /* gzip */
 216                        get_header_ptr = get_header_tar_gz;
 217                } else
 218#endif
 219#if ENABLE_FEATURE_SEAMLESS_BZ2
 220                if (tar.name[0] == 'B' && tar.name[1] == 'Z'
 221                 && tar.name[2] == 'h' && isdigit(tar.name[3])
 222                ) { /* bzip2 */
 223                        get_header_ptr = get_header_tar_bz2;
 224                } else
 225#endif
 226                        goto err;
 227                /* Two different causes for lseek() != 0:
 228                 * unseekable fd (would like to support that too, but...),
 229                 * or not first block (false positive, it's not .gz/.bz2!) */
 230                if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0)
 231                        goto err;
 232                while (get_header_ptr(archive_handle) == EXIT_SUCCESS)
 233                        continue;
 234                return EXIT_FAILURE;
 235 err:
 236#endif /* FEATURE_TAR_AUTODETECT */
 237                bb_error_msg_and_die("invalid tar magic");
 238        }
 239
 240        /* Do checksum on headers.
 241         * POSIX says that checksum is done on unsigned bytes, but
 242         * Sun and HP-UX gets it wrong... more details in
 243         * GNU tar source. */
 244#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
 245        sum_s = ' ' * sizeof(tar.chksum);
 246#endif
 247        sum_u = ' ' * sizeof(tar.chksum);
 248        for (i = 0; i < 148; i++) {
 249                sum_u += ((unsigned char*)&tar)[i];
 250#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
 251                sum_s += ((signed char*)&tar)[i];
 252#endif
 253        }
 254        for (i = 156; i < 512; i++) {
 255                sum_u += ((unsigned char*)&tar)[i];
 256#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
 257                sum_s += ((signed char*)&tar)[i];
 258#endif
 259        }
 260        /* This field does not need special treatment (getOctal) */
 261        {
 262                char *endp; /* gcc likes temp var for &endp */
 263                sum = strtoul(tar.chksum, &endp, 8);
 264                if ((*endp != '\0' && *endp != ' ')
 265                 || (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum))
 266                ) {
 267                        bb_error_msg_and_die("invalid tar header checksum");
 268                }
 269        }
 270        /* don't use xstrtoul, tar.chksum may have leading spaces */
 271        sum = strtoul(tar.chksum, NULL, 8);
 272        if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) {
 273                bb_error_msg_and_die("invalid tar header checksum");
 274        }
 275
 276        /* 0 is reserved for high perf file, treat as normal file */
 277        if (!tar.typeflag) tar.typeflag = '0';
 278        parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7');
 279
 280        /* getOctal trashes subsequent field, therefore we call it
 281         * on fields in reverse order */
 282        if (tar.devmajor[0]) {
 283                char t = tar.prefix[0];
 284                /* we trash prefix[0] here, but we DO need it later! */
 285                unsigned minor = GET_OCTAL(tar.devminor);
 286                unsigned major = GET_OCTAL(tar.devmajor);
 287                file_header->device = makedev(major, minor);
 288                tar.prefix[0] = t;
 289        }
 290        file_header->link_target = NULL;
 291        if (!p_linkname && parse_names && tar.linkname[0]) {
 292                file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname));
 293                /* FIXME: what if we have non-link object with link_target? */
 294                /* Will link_target be free()ed? */
 295        }
 296#if ENABLE_FEATURE_TAR_UNAME_GNAME
 297        file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL;
 298        file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL;
 299#endif
 300        /* mtime: rudimentally handle GNU tar's "base256 encoding"
 301         * People report tarballs with NEGATIVE unix times encoded that way */
 302        file_header->mtime = (tar.mtime[0] & 0x80) /* base256? */
 303                        ? 0 /* bogus */
 304                        : GET_OCTAL(tar.mtime);
 305        /* size: handle GNU tar's "base256 encoding" */
 306        file_header->size = (tar.size[0] & 0xc0) == 0x80 /* positive base256? */
 307                        ? getBase256_len12(tar.size)
 308                        : GET_OCTAL(tar.size);
 309        file_header->gid = GET_OCTAL(tar.gid);
 310        file_header->uid = GET_OCTAL(tar.uid);
 311        /* Set bits 0-11 of the files mode */
 312        file_header->mode = 07777 & GET_OCTAL(tar.mode);
 313
 314        file_header->name = NULL;
 315        if (!p_longname && parse_names) {
 316                /* we trash mode[0] here, it's ok */
 317                //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain
 318                tar.mode[0] = '\0';
 319                if (tar.prefix[0]) {
 320                        /* and padding[0] */
 321                        //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain
 322                        tar.padding[0] = '\0';
 323                        file_header->name = concat_path_file(tar.prefix, tar.name);
 324                } else
 325                        file_header->name = xstrdup(tar.name);
 326        }
 327
 328        /* Set bits 12-15 of the files mode */
 329        /* (typeflag was not trashed because chksum does not use getOctal) */
 330        switch (tar.typeflag) {
 331        /* busybox identifies hard links as being regular files with 0 size and a link name */
 332        case '1':
 333                file_header->mode |= S_IFREG;
 334                break;
 335        case '7':
 336        /* case 0: */
 337        case '0':
 338#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
 339                if (last_char_is(file_header->name, '/')) {
 340                        goto set_dir;
 341                }
 342#endif
 343                file_header->mode |= S_IFREG;
 344                break;
 345        case '2':
 346                file_header->mode |= S_IFLNK;
 347                /* have seen tarballs with size field containing
 348                 * the size of the link target's name */
 349 size0:
 350                file_header->size = 0;
 351                break;
 352        case '3':
 353                file_header->mode |= S_IFCHR;
 354                goto size0; /* paranoia */
 355        case '4':
 356                file_header->mode |= S_IFBLK;
 357                goto size0;
 358        case '5':
 359 IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:)
 360                file_header->mode |= S_IFDIR;
 361                goto size0;
 362        case '6':
 363                file_header->mode |= S_IFIFO;
 364                goto size0;
 365#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
 366        case 'L':
 367                /* free: paranoia: tar with several consecutive longnames */
 368                free(p_longname);
 369                /* For paranoia reasons we allocate extra NUL char */
 370                p_longname = xzalloc(file_header->size + 1);
 371                /* We read ASCIZ string, including NUL */
 372                xread(archive_handle->src_fd, p_longname, file_header->size);
 373                archive_handle->offset += file_header->size;
 374                /* return get_header_tar(archive_handle); */
 375                /* gcc 4.1.1 didn't optimize it into jump */
 376                /* so we will do it ourself, this also saves stack */
 377                goto again;
 378        case 'K':
 379                free(p_linkname);
 380                p_linkname = xzalloc(file_header->size + 1);
 381                xread(archive_handle->src_fd, p_linkname, file_header->size);
 382                archive_handle->offset += file_header->size;
 383                /* return get_header_tar(archive_handle); */
 384                goto again;
 385        case 'D':       /* GNU dump dir */
 386        case 'M':       /* Continuation of multi volume archive */
 387        case 'N':       /* Old GNU for names > 100 characters */
 388        case 'S':       /* Sparse file */
 389        case 'V':       /* Volume header */
 390#endif
 391        case 'g':       /* pax global header */
 392        case 'x': {     /* pax extended header */
 393                off_t sz;
 394                bb_error_msg("warning: skipping header '%c'", tar.typeflag);
 395                sz = (file_header->size + 511) & ~(off_t)511;
 396                archive_handle->offset += sz;
 397                sz >>= 9; /* sz /= 512 but w/o contortions for signed div */
 398                while (sz--)
 399                        xread(archive_handle->src_fd, &tar, 512);
 400                /* return get_header_tar(archive_handle); */
 401                goto again_after_align;
 402        }
 403        default:
 404                bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag);
 405        }
 406
 407#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
 408        if (p_longname) {
 409                file_header->name = p_longname;
 410                p_longname = NULL;
 411        }
 412        if (p_linkname) {
 413                file_header->link_target = p_linkname;
 414                p_linkname = NULL;
 415        }
 416#endif
 417        if (strncmp(file_header->name, "/../"+1, 3) == 0
 418         || strstr(file_header->name, "/../")
 419        ) {
 420                bb_error_msg_and_die("name with '..' encountered: '%s'",
 421                                file_header->name);
 422        }
 423
 424        /* Strip trailing '/' in directories */
 425        /* Must be done after mode is set as '/' is used to check if it's a directory */
 426        cp = last_char_is(file_header->name, '/');
 427
 428        if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
 429                archive_handle->action_header(/*archive_handle->*/ file_header);
 430                /* Note that we kill the '/' only after action_header() */
 431                /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */
 432                if (cp) *cp = '\0';
 433                archive_handle->ah_flags |= ARCHIVE_EXTRACT_QUIET;
 434                archive_handle->action_data(archive_handle);
 435                llist_add_to(&(archive_handle->passed), file_header->name);
 436        } else {
 437                data_skip(archive_handle);
 438                free(file_header->name);
 439        }
 440        archive_handle->offset += file_header->size;
 441
 442        free(file_header->link_target);
 443        /* Do not free(file_header->name)! (why?) */
 444#if ENABLE_FEATURE_TAR_UNAME_GNAME
 445        free(file_header->tar__uname);
 446        free(file_header->tar__gname);
 447#endif
 448        return EXIT_SUCCESS;
 449}
 450