busybox/coreutils/sort.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * SuS3 compliant sort implementation for busybox
   4 *
   5 * Copyright (C) 2004 by Rob Landley <rob@landley.net>
   6 *
   7 * MAINTAINER: Rob Landley <rob@landley.net>
   8 *
   9 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  10 *
  11 * See SuS3 sort standard at:
  12 * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
  13 */
  14
  15#include "libbb.h"
  16
  17/* This is a NOEXEC applet. Be very careful! */
  18
  19
  20/*
  21        sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...]
  22        sort -c [-bdfinru][-t char][-k keydef][file]
  23*/
  24
  25/* These are sort types */
  26static const char OPT_STR[] ALIGN1 = "ngMucszbrdfimS:T:o:k:t:";
  27enum {
  28        FLAG_n  = 1,            /* Numeric sort */
  29        FLAG_g  = 2,            /* Sort using strtod() */
  30        FLAG_M  = 4,            /* Sort date */
  31/* ucsz apply to root level only, not keys.  b at root level implies bb */
  32        FLAG_u  = 8,            /* Unique */
  33        FLAG_c  = 0x10,         /* Check: no output, exit(!ordered) */
  34        FLAG_s  = 0x20,         /* Stable sort, no ascii fallback at end */
  35        FLAG_z  = 0x40,         /* Input and output is NUL terminated, not \n */
  36/* These can be applied to search keys, the previous four can't */
  37        FLAG_b  = 0x80,         /* Ignore leading blanks */
  38        FLAG_r  = 0x100,        /* Reverse */
  39        FLAG_d  = 0x200,        /* Ignore !(isalnum()|isspace()) */
  40        FLAG_f  = 0x400,        /* Force uppercase */
  41        FLAG_i  = 0x800,        /* Ignore !isprint() */
  42        FLAG_m  = 0x1000,       /* ignored: merge already sorted files; do not sort */
  43        FLAG_S  = 0x2000,       /* ignored: -S, --buffer-size=SIZE */
  44        FLAG_T  = 0x4000,       /* ignored: -T, --temporary-directory=DIR */
  45        FLAG_o  = 0x8000,
  46        FLAG_k  = 0x10000,
  47        FLAG_t  = 0x20000,
  48        FLAG_bb = 0x80000000,   /* Ignore trailing blanks  */
  49};
  50
  51#if ENABLE_FEATURE_SORT_BIG
  52static char key_separator;
  53
  54static struct sort_key {
  55        struct sort_key *next_key;  /* linked list */
  56        unsigned range[4];          /* start word, start char, end word, end char */
  57        unsigned flags;
  58} *key_list;
  59
  60static char *get_key(char *str, struct sort_key *key, int flags)
  61{
  62        int start = 0, end = 0, len, j;
  63        unsigned i;
  64
  65        /* Special case whole string, so we don't have to make a copy */
  66        if (key->range[0] == 1 && !key->range[1] && !key->range[2] && !key->range[3]
  67         && !(flags & (FLAG_b | FLAG_d | FLAG_f | FLAG_i | FLAG_bb))
  68        ) {
  69                return str;
  70        }
  71
  72        /* Find start of key on first pass, end on second pass */
  73        len = strlen(str);
  74        for (j = 0; j < 2; j++) {
  75                if (!key->range[2*j])
  76                        end = len;
  77                /* Loop through fields */
  78                else {
  79                        end = 0;
  80                        for (i = 1; i < key->range[2*j] + j; i++) {
  81                                if (key_separator) {
  82                                        /* Skip body of key and separator */
  83                                        while (str[end]) {
  84                                                if (str[end++] == key_separator)
  85                                                        break;
  86                                        }
  87                                } else {
  88                                        /* Skip leading blanks */
  89                                        while (isspace(str[end]))
  90                                                end++;
  91                                        /* Skip body of key */
  92                                        while (str[end]) {
  93                                                if (isspace(str[end]))
  94                                                        break;
  95                                                end++;
  96                                        }
  97                                }
  98                        }
  99                }
 100                if (!j) start = end;
 101        }
 102        /* Strip leading whitespace if necessary */
 103//XXX: skip_whitespace()
 104        if (flags & FLAG_b)
 105                while (isspace(str[start])) start++;
 106        /* Strip trailing whitespace if necessary */
 107        if (flags & FLAG_bb)
 108                while (end > start && isspace(str[end-1])) end--;
 109        /* Handle offsets on start and end */
 110        if (key->range[3]) {
 111                end += key->range[3] - 1;
 112                if (end > len) end = len;
 113        }
 114        if (key->range[1]) {
 115                start += key->range[1] - 1;
 116                if (start > len) start = len;
 117        }
 118        /* Make the copy */
 119        if (end < start) end = start;
 120        str = xstrndup(str+start, end-start);
 121        /* Handle -d */
 122        if (flags & FLAG_d) {
 123                for (start = end = 0; str[end]; end++)
 124                        if (isspace(str[end]) || isalnum(str[end]))
 125                                str[start++] = str[end];
 126                str[start] = '\0';
 127        }
 128        /* Handle -i */
 129        if (flags & FLAG_i) {
 130                for (start = end = 0; str[end]; end++)
 131                        if (isprint_asciionly(str[end]))
 132                                str[start++] = str[end];
 133                str[start] = '\0';
 134        }
 135        /* Handle -f */
 136        if (flags & FLAG_f)
 137                for (i = 0; str[i]; i++)
 138                        str[i] = toupper(str[i]);
 139
 140        return str;
 141}
 142
 143static struct sort_key *add_key(void)
 144{
 145        struct sort_key **pkey = &key_list;
 146        while (*pkey)
 147                pkey = &((*pkey)->next_key);
 148        return *pkey = xzalloc(sizeof(struct sort_key));
 149}
 150
 151#define GET_LINE(fp) \
 152        ((option_mask32 & FLAG_z) \
 153        ? bb_get_chunk_from_file(fp, NULL) \
 154        : xmalloc_fgetline(fp))
 155#else
 156#define GET_LINE(fp) xmalloc_fgetline(fp)
 157#endif
 158
 159/* Iterate through keys list and perform comparisons */
 160static int compare_keys(const void *xarg, const void *yarg)
 161{
 162        int flags = option_mask32, retval = 0;
 163        char *x, *y;
 164
 165#if ENABLE_FEATURE_SORT_BIG
 166        struct sort_key *key;
 167
 168        for (key = key_list; !retval && key; key = key->next_key) {
 169                flags = key->flags ? key->flags : option_mask32;
 170                /* Chop out and modify key chunks, handling -dfib */
 171                x = get_key(*(char **)xarg, key, flags);
 172                y = get_key(*(char **)yarg, key, flags);
 173#else
 174        /* This curly bracket serves no purpose but to match the nesting
 175           level of the for () loop we're not using */
 176        {
 177                x = *(char **)xarg;
 178                y = *(char **)yarg;
 179#endif
 180                /* Perform actual comparison */
 181                switch (flags & 7) {
 182                default:
 183                        bb_error_msg_and_die("unknown sort type");
 184                        break;
 185                /* Ascii sort */
 186                case 0:
 187#if ENABLE_LOCALE_SUPPORT
 188                        retval = strcoll(x, y);
 189#else
 190                        retval = strcmp(x, y);
 191#endif
 192                        break;
 193#if ENABLE_FEATURE_SORT_BIG
 194                case FLAG_g: {
 195                        char *xx, *yy;
 196                        double dx = strtod(x, &xx);
 197                        double dy = strtod(y, &yy);
 198                        /* not numbers < NaN < -infinity < numbers < +infinity) */
 199                        if (x == xx)
 200                                retval = (y == yy ? 0 : -1);
 201                        else if (y == yy)
 202                                retval = 1;
 203                        /* Check for isnan */
 204                        else if (dx != dx)
 205                                retval = (dy != dy) ? 0 : -1;
 206                        else if (dy != dy)
 207                                retval = 1;
 208                        /* Check for infinity.  Could underflow, but it avoids libm. */
 209                        else if (1.0 / dx == 0.0) {
 210                                if (dx < 0)
 211                                        retval = (1.0 / dy == 0.0 && dy < 0) ? 0 : -1;
 212                                else
 213                                        retval = (1.0 / dy == 0.0 && dy > 0) ? 0 : 1;
 214                        } else if (1.0 / dy == 0.0)
 215                                retval = (dy < 0) ? 1 : -1;
 216                        else
 217                                retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0);
 218                        break;
 219                }
 220                case FLAG_M: {
 221                        struct tm thyme;
 222                        int dx;
 223                        char *xx, *yy;
 224
 225                        xx = strptime(x, "%b", &thyme);
 226                        dx = thyme.tm_mon;
 227                        yy = strptime(y, "%b", &thyme);
 228                        if (!xx)
 229                                retval = (!yy) ? 0 : -1;
 230                        else if (!yy)
 231                                retval = 1;
 232                        else
 233                                retval = (dx == thyme.tm_mon) ? 0 : dx - thyme.tm_mon;
 234                        break;
 235                }
 236                /* Full floating point version of -n */
 237                case FLAG_n: {
 238                        double dx = atof(x);
 239                        double dy = atof(y);
 240                        retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0);
 241                        break;
 242                }
 243                } /* switch */
 244                /* Free key copies. */
 245                if (x != *(char **)xarg) free(x);
 246                if (y != *(char **)yarg) free(y);
 247                /* if (retval) break; - done by for () anyway */
 248#else
 249                /* Integer version of -n for tiny systems */
 250                case FLAG_n:
 251                        retval = atoi(x) - atoi(y);
 252                        break;
 253                } /* switch */
 254#endif
 255        } /* for */
 256
 257        /* Perform fallback sort if necessary */
 258        if (!retval && !(option_mask32 & FLAG_s))
 259                retval = strcmp(*(char **)xarg, *(char **)yarg);
 260
 261        if (flags & FLAG_r) return -retval;
 262        return retval;
 263}
 264
 265#if ENABLE_FEATURE_SORT_BIG
 266static unsigned str2u(char **str)
 267{
 268        unsigned long lu;
 269        if (!isdigit((*str)[0]))
 270                bb_error_msg_and_die("bad field specification");
 271        lu = strtoul(*str, str, 10);
 272        if ((sizeof(long) > sizeof(int) && lu > INT_MAX) || !lu)
 273                bb_error_msg_and_die("bad field specification");
 274        return lu;
 275}
 276#endif
 277
 278int sort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 279int sort_main(int argc UNUSED_PARAM, char **argv)
 280{
 281        char *line, **lines;
 282        char *str_ignored, *str_o, *str_t;
 283        llist_t *lst_k = NULL;
 284        int i, flag;
 285        int linecount;
 286        unsigned opts;
 287
 288        xfunc_error_retval = 2;
 289
 290        /* Parse command line options */
 291        /* -o and -t can be given at most once */
 292        opt_complementary = "o--o:t--t:" /* -t, -o: at most one of each */
 293                        "k::"; /* -k takes list */
 294        opts = getopt32(argv, OPT_STR, &str_ignored, &str_ignored, &str_o, &lst_k, &str_t);
 295        /* global b strips leading and trailing spaces */
 296        if (opts & FLAG_b)
 297                option_mask32 |= FLAG_bb;
 298#if ENABLE_FEATURE_SORT_BIG
 299        if (opts & FLAG_t) {
 300                if (!str_t[0] || str_t[1])
 301                        bb_error_msg_and_die("bad -t parameter");
 302                key_separator = str_t[0];
 303        }
 304        /* note: below this point we use option_mask32, not opts,
 305         * since that reduces register pressure and makes code smaller */
 306
 307        /* parse sort key */
 308        while (lst_k) {
 309                enum {
 310                        FLAG_allowed_for_k =
 311                                FLAG_n | /* Numeric sort */
 312                                FLAG_g | /* Sort using strtod() */
 313                                FLAG_M | /* Sort date */
 314                                FLAG_b | /* Ignore leading blanks */
 315                                FLAG_r | /* Reverse */
 316                                FLAG_d | /* Ignore !(isalnum()|isspace()) */
 317                                FLAG_f | /* Force uppercase */
 318                                FLAG_i | /* Ignore !isprint() */
 319                        0
 320                };
 321                struct sort_key *key = add_key();
 322                char *str_k = llist_pop(&lst_k);
 323
 324                i = 0; /* i==0 before comma, 1 after (-k3,6) */
 325                while (*str_k) {
 326                        /* Start of range */
 327                        /* Cannot use bb_strtou - suffix can be a letter */
 328                        key->range[2*i] = str2u(&str_k);
 329                        if (*str_k == '.') {
 330                                str_k++;
 331                                key->range[2*i+1] = str2u(&str_k);
 332                        }
 333                        while (*str_k) {
 334                                const char *temp2;
 335
 336                                if (*str_k == ',' && !i++) {
 337                                        str_k++;
 338                                        break;
 339                                } /* no else needed: fall through to syntax error
 340                                        because comma isn't in OPT_STR */
 341                                temp2 = strchr(OPT_STR, *str_k);
 342                                if (!temp2)
 343                                        bb_error_msg_and_die("unknown key option");
 344                                flag = 1 << (temp2 - OPT_STR);
 345                                if (flag & ~FLAG_allowed_for_k)
 346                                        bb_error_msg_and_die("unknown sort type");
 347                                /* b after ',' means strip _trailing_ space */
 348                                if (i && flag == FLAG_b)
 349                                        flag = FLAG_bb;
 350                                key->flags |= flag;
 351                                str_k++;
 352                        }
 353                }
 354        }
 355#endif
 356
 357        /* Open input files and read data */
 358        argv += optind;
 359        if (!*argv)
 360                *--argv = (char*)"-";
 361        linecount = 0;
 362        lines = NULL;
 363        do {
 364                /* coreutils 6.9 compat: abort on first open error,
 365                 * do not continue to next file: */
 366                FILE *fp = xfopen_stdin(*argv);
 367                for (;;) {
 368                        line = GET_LINE(fp);
 369                        if (!line)
 370                                break;
 371                        lines = xrealloc_vector(lines, 6, linecount);
 372                        lines[linecount++] = line;
 373                }
 374                fclose_if_not_stdin(fp);
 375        } while (*++argv);
 376
 377#if ENABLE_FEATURE_SORT_BIG
 378        /* if no key, perform alphabetic sort */
 379        if (!key_list)
 380                add_key()->range[0] = 1;
 381        /* handle -c */
 382        if (option_mask32 & FLAG_c) {
 383                int j = (option_mask32 & FLAG_u) ? -1 : 0;
 384                for (i = 1; i < linecount; i++) {
 385                        if (compare_keys(&lines[i-1], &lines[i]) > j) {
 386                                fprintf(stderr, "Check line %u\n", i);
 387                                return EXIT_FAILURE;
 388                        }
 389                }
 390                return EXIT_SUCCESS;
 391        }
 392#endif
 393        /* Perform the actual sort */
 394        qsort(lines, linecount, sizeof(lines[0]), compare_keys);
 395        /* handle -u */
 396        if (option_mask32 & FLAG_u) {
 397                flag = 0;
 398                /* coreutils 6.3 drop lines for which only key is the same */
 399                /* -- disabling last-resort compare... */
 400                option_mask32 |= FLAG_s;
 401                for (i = 1; i < linecount; i++) {
 402                        if (compare_keys(&lines[flag], &lines[i]) == 0)
 403                                free(lines[i]);
 404                        else
 405                                lines[++flag] = lines[i];
 406                }
 407                if (linecount)
 408                        linecount = flag+1;
 409        }
 410
 411        /* Print it */
 412#if ENABLE_FEATURE_SORT_BIG
 413        /* Open output file _after_ we read all input ones */
 414        if (option_mask32 & FLAG_o)
 415                xmove_fd(xopen(str_o, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
 416#endif
 417        flag = (option_mask32 & FLAG_z) ? '\0' : '\n';
 418        for (i = 0; i < linecount; i++)
 419                printf("%s%c", lines[i], flag);
 420
 421        fflush_stdout_and_exit(EXIT_SUCCESS);
 422}
 423