toybox/toys/posix/sort.c
<<
>>
Prefs
   1/* sort.c - put input lines into order
   2 *
   3 * Copyright 2004, 2008 Rob Landley <rob@landley.net>
   4 *
   5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
   6 *
   7 * Deviations from POSIX: Lots.
   8 * We invented -x
   9
  10USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN))
  11
  12config SORT
  13  bool "sort"
  14  default y
  15  help
  16    usage: sort [-run] [FILE...]
  17
  18    Sort all lines of text from input files (or stdin) to stdout.
  19
  20    -r  reverse
  21    -u  unique lines only
  22    -n  numeric order (instead of alphabetical)
  23
  24config SORT_BIG
  25  bool "SuSv3 options (Support -ktcsbdfiozM)"
  26  default y
  27  depends on SORT
  28  help
  29    usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE]
  30
  31    -b  ignore leading blanks (or trailing blanks in second part of key)
  32    -c  check whether input is sorted
  33    -d  dictionary order (use alphanumeric and whitespace chars only)
  34    -f  force uppercase (case insensitive sort)
  35    -i  ignore nonprinting characters
  36    -M  month sort (jan, feb, etc)
  37    -x  Hexadecimal numerical sort
  38    -s  skip fallback sort (only sort with keys)
  39    -z  zero (null) terminated lines
  40    -k  sort by "key" (see below)
  41    -t  use a key separator other than whitespace
  42    -o  output to FILE instead of stdout
  43
  44    Sorting by key looks at a subset of the words on each line.  -k2
  45    uses the second word to the end of the line, -k2,2 looks at only
  46    the second word, -k2,4 looks from the start of the second to the end
  47    of the fourth word.  Specifying multiple keys uses the later keys as
  48    tie breakers, in order.  A type specifier appended to a sort key
  49    (such as -2,2n) applies only to sorting that key.
  50
  51config SORT_FLOAT
  52  bool
  53  default y
  54  depends on SORT_BIG && TOYBOX_FLOAT
  55  help
  56    usage: sort [-g]
  57
  58    -g  general numeric sort (double precision with nan and inf)
  59*/
  60
  61#define FOR_sort
  62#include "toys.h"
  63
  64GLOBALS(
  65  char *t;
  66  struct arg_list *k;
  67  char *o, *T, S;
  68
  69  void *key_list;
  70  int linecount;
  71  char **lines;
  72)
  73
  74// The sort types are n, g, and M.
  75// u, c, s, and z apply to top level only, not to keys.
  76// b at top level implies bb.
  77// The remaining options can be applied to search keys.
  78
  79#define FLAG_bb (1<<31)  // Ignore trailing blanks
  80
  81struct sort_key
  82{
  83  struct sort_key *next_key;  // linked list
  84  unsigned range[4];          // start word, start char, end word, end char
  85  int flags;
  86};
  87
  88// Copy of the part of this string corresponding to a key/flags.
  89
  90static char *get_key_data(char *str, struct sort_key *key, int flags)
  91{
  92  int start=0, end, len, i, j;
  93
  94  // Special case whole string, so we don't have to make a copy
  95
  96  if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
  97    && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
  98
  99  // Find start of key on first pass, end on second pass
 100
 101  len = strlen(str);
 102  for (j=0; j<2; j++) {
 103    if (!key->range[2*j]) end=len;
 104
 105    // Loop through fields
 106    else {
 107      end=0;
 108      for (i=1; i < key->range[2*j]+j; i++) {
 109
 110        // Skip leading blanks
 111        if (str[end] && !TT.t) while (isspace(str[end])) end++;
 112
 113        // Skip body of key
 114        for (; str[end]; end++) {
 115          if (TT.t) {
 116            if (str[end]==*TT.t) {
 117              end++;
 118              break;
 119            }
 120          } else if (isspace(str[end])) break;
 121        }
 122      }
 123    }
 124    if (!j) start=end;
 125  }
 126
 127  // Key with explicit separator starts after the separator
 128  if (TT.t && str[start]==*TT.t) start++;
 129
 130  // Strip leading and trailing whitespace if necessary
 131  if (flags&FLAG_b) while (isspace(str[start])) start++;
 132  if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
 133
 134  // Handle offsets on start and end
 135  if (key->range[3]) {
 136    end += key->range[3]-1;
 137    if (end>len) end=len;
 138  }
 139  if (key->range[1]) {
 140    start += key->range[1]-1;
 141    if (start>len) start=len;
 142  }
 143
 144  // Make the copy
 145  if (end<start) end=start;
 146  str = xstrndup(str+start, end-start);
 147
 148  // Handle -d
 149  if (flags&FLAG_d) {
 150    for (start = end = 0; str[end]; end++)
 151      if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
 152    str[start] = 0;
 153  }
 154
 155  // Handle -i
 156  if (flags&FLAG_i) {
 157    for (start = end = 0; str[end]; end++)
 158      if (isprint(str[end])) str[start++] = str[end];
 159    str[start] = 0;
 160  }
 161
 162  return str;
 163}
 164
 165// append a sort_key to key_list.
 166
 167static struct sort_key *add_key(void)
 168{
 169  void **stupid_compiler = &TT.key_list;
 170  struct sort_key **pkey = (struct sort_key **)stupid_compiler;
 171
 172  while (*pkey) pkey = &((*pkey)->next_key);
 173  return *pkey = xzalloc(sizeof(struct sort_key));
 174}
 175
 176// Perform actual comparison
 177static int compare_values(int flags, char *x, char *y)
 178{
 179  int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x);
 180
 181  // Ascii sort
 182  if (!ff) return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
 183
 184  if (CFG_SORT_FLOAT && ff == FLAG_g) {
 185    char *xx,*yy;
 186    double dx = strtod(x,&xx), dy = strtod(y,&yy);
 187    int xinf, yinf;
 188
 189    // not numbers < NaN < -infinity < numbers < +infinity
 190
 191    if (x==xx) return y==yy ? 0 : -1;
 192    if (y==yy) return 1;
 193
 194    // Check for isnan
 195    if (dx!=dx) return (dy!=dy) ? 0 : -1;
 196    if (dy!=dy) return 1;
 197
 198    // Check for infinity.  (Could underflow, but avoids needing libm.)
 199    xinf = (1.0/dx == 0.0);
 200    yinf = (1.0/dy == 0.0);
 201    if (xinf) {
 202      if(dx<0) return (yinf && dy<0) ? 0 : -1;
 203      return (yinf && dy>0) ? 0 : 1;
 204    }
 205    if (yinf) return dy<0 ? 1 : -1;
 206
 207    return dx>dy ? 1 : (dx<dy ? -1 : 0);
 208  } else if (CFG_SORT_BIG && ff == FLAG_M) {
 209    struct tm thyme;
 210    int dx;
 211    char *xx,*yy;
 212
 213    xx = strptime(x,"%b",&thyme);
 214    dx = thyme.tm_mon;
 215    yy = strptime(y,"%b",&thyme);
 216    if (!xx) return !yy ? 0 : -1;
 217    else if (!yy) return 1;
 218    else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
 219
 220  } else if (CFG_SORT_BIG && ff == FLAG_x) {
 221    return strtol(x, NULL, 16)-strtol(y, NULL, 16);
 222  // This has to be ff == FLAG_n
 223  } else {
 224    // Full floating point version of -n
 225    if (CFG_SORT_FLOAT) {
 226      double dx = atof(x), dy = atof(y);
 227
 228      return dx>dy ? 1 : (dx<dy ? -1 : 0);
 229    // Integer version of -n for tiny systems
 230    } else return atoi(x)-atoi(y);
 231  }
 232}
 233
 234// Callback from qsort(): Iterate through key_list and perform comparisons.
 235static int compare_keys(const void *xarg, const void *yarg)
 236{
 237  int flags = toys.optflags, retval = 0;
 238  char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
 239  struct sort_key *key;
 240
 241  if (CFG_SORT_BIG) {
 242    for (key=(struct sort_key *)TT.key_list; !retval && key;
 243       key = key->next_key)
 244    {
 245      flags = key->flags ? key->flags : toys.optflags;
 246
 247      // Chop out and modify key chunks, handling -dfib
 248
 249      x = get_key_data(xx, key, flags);
 250      y = get_key_data(yy, key, flags);
 251
 252      retval = compare_values(flags, x, y);
 253
 254      // Free the copies get_key_data() made.
 255
 256      if (x != xx) free(x);
 257      if (y != yy) free(y);
 258
 259      if (retval) break;
 260    }
 261  } else retval = compare_values(flags, xx, yy);
 262
 263  // Perform fallback sort if necessary (always case insensitive, no -f,
 264  // the point is to get a stable order even for -f sorts)
 265  if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) {
 266    flags = toys.optflags;
 267    retval = strcmp(xx, yy);
 268  }
 269
 270  return retval * ((flags&FLAG_r) ? -1 : 1);
 271}
 272
 273// Callback from loopfiles to handle input files.
 274static void sort_read(int fd, char *name)
 275{
 276  // Read each line from file, appending to a big array.
 277
 278  for (;;) {
 279    char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z))
 280             ? get_rawline(fd, NULL, 0) : get_line(fd);
 281
 282    if (!line) break;
 283
 284    // handle -c here so we don't allocate more memory than necessary.
 285    if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) {
 286      int j = (toys.optflags&FLAG_u) ? -1 : 0;
 287
 288      if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
 289        error_exit("%s: Check line %d\n", name, TT.linecount);
 290      free(TT.lines);
 291      TT.lines = (char **)line;
 292    } else {
 293      if (!(TT.linecount&63))
 294        TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
 295      TT.lines[TT.linecount] = line;
 296    }
 297    TT.linecount++;
 298  }
 299}
 300
 301void sort_main(void)
 302{
 303  int idx, fd = 1;
 304
 305  // Open output file if necessary.
 306  if (CFG_SORT_BIG && TT.o)
 307    fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
 308
 309  // Parse -k sort keys.
 310  if (CFG_SORT_BIG && TT.k) {
 311    struct arg_list *arg;
 312
 313    for (arg = TT.k; arg; arg = arg->next) {
 314      struct sort_key *key = add_key();
 315      char *temp;
 316      int flag;
 317
 318      idx = 0;
 319      temp = arg->arg;
 320      while (*temp) {
 321        // Start of range
 322        key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
 323        if (*temp=='.')
 324          key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
 325
 326        // Handle flags appended to a key type.
 327        for (;*temp;temp++) {
 328          char *temp2, *optlist;
 329
 330          // Note that a second comma becomes an "Unknown key" error.
 331
 332          if (*temp==',' && !idx++) {
 333            temp++;
 334            break;
 335          }
 336
 337          // Which flag is this?
 338
 339          optlist = toys.which->options;
 340          temp2 = strchr(optlist, *temp);
 341          flag = (1<<(optlist-temp2+strlen(optlist)-1));
 342
 343          // Was it a flag that can apply to a key?
 344
 345          if (!temp2 || flag>FLAG_b
 346            || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
 347          {
 348            error_exit("Unknown key option.");
 349          }
 350          // b after , means strip _trailing_ space, not leading.
 351          if (idx && flag==FLAG_b) flag = FLAG_bb;
 352          key->flags |= flag;
 353        }
 354      }
 355    }
 356  }
 357
 358  // global b flag strips both leading and trailing spaces
 359  if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb;
 360
 361  // If no keys, perform alphabetic sort over the whole line.
 362  if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1;
 363
 364  // Open input files and read data, populating TT.lines[TT.linecount]
 365  loopfiles(toys.optargs, sort_read);
 366
 367  // The compare (-c) logic was handled in sort_read(),
 368  // so if we got here, we're done.
 369  if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now;
 370
 371  // Perform the actual sort
 372  qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
 373
 374  // handle unique (-u)
 375  if (toys.optflags&FLAG_u) {
 376    int jdx;
 377
 378    for (jdx=0, idx=1; idx<TT.linecount; idx++) {
 379      if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
 380        free(TT.lines[idx]);
 381      else TT.lines[++jdx] = TT.lines[idx];
 382    }
 383    if (TT.linecount) TT.linecount = jdx+1;
 384  }
 385
 386  // Output result
 387  for (idx = 0; idx<TT.linecount; idx++) {
 388    char *s = TT.lines[idx];
 389    unsigned i = strlen(s);
 390
 391    if (!(toys.optflags&FLAG_z)) s[i] = '\n';
 392    xwrite(fd, s, i+1);
 393    if (CFG_TOYBOX_FREE) free(s);
 394  }
 395
 396exit_now:
 397  if (CFG_TOYBOX_FREE) {
 398    if (fd != 1) close(fd);
 399    free(TT.lines);
 400  }
 401}
 402