toybox/toys/posix/sort.c
<<
>>
Prefs
   1/* sort.c - put input lines into order
   2 *
   3 * Copyright 2004, 2008 Rob Landley <rob@landley.net>
   4 *
   5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
   6 *
   7 * Deviations from POSIX: Lots.
   8 * We invented -x
   9
  10USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
  11
  12config SORT
  13  bool "sort"
  14  default y
  15  help
  16    usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
  17
  18    Sort all lines of text from input files (or stdin) to stdout.
  19
  20    -r  Reverse
  21    -u  Unique lines only
  22    -n  Numeric order (instead of alphabetical)
  23    -b  Ignore leading blanks (or trailing blanks in second part of key)
  24    -c  Check whether input is sorted
  25    -d  Dictionary order (use alphanumeric and whitespace chars only)
  26    -f  Force uppercase (case insensitive sort)
  27    -i  Ignore nonprinting characters
  28    -M  Month sort (jan, feb, etc)
  29    -x  Hexadecimal numerical sort
  30    -s  Skip fallback sort (only sort with keys)
  31    -z  Zero (null) terminated lines
  32    -k  Sort by "key" (see below)
  33    -t  Use a key separator other than whitespace
  34    -o  Output to FILE instead of stdout
  35    -V  Version numbers (name-1.234-rc6.5b.tgz)
  36
  37    Sorting by key looks at a subset of the words on each line. -k2 uses the
  38    second word to the end of the line, -k2,2 looks at only the second word,
  39    -k2,4 looks from the start of the second to the end of the fourth word.
  40    -k2.4,5 starts from the fourth character of the second word, to the end
  41    of the fifth word. Specifying multiple keys uses the later keys as tie
  42    breakers, in order. A type specifier appended to a sort key (such as -2,2n)
  43    applies only to sorting that key.
  44
  45config SORT_FLOAT
  46  bool
  47  default y
  48  depends on TOYBOX_FLOAT
  49  help
  50    usage: sort [-g]
  51
  52    -g  General numeric sort (double precision with nan and inf)
  53*/
  54
  55#define FOR_sort
  56#include "toys.h"
  57
  58GLOBALS(
  59  char *t;
  60  struct arg_list *k;
  61  char *o, *T, S;
  62
  63  void *key_list;
  64  unsigned linecount;
  65  char **lines, *name;
  66)
  67
  68// The sort types are n, g, and M.
  69// u, c, s, and z apply to top level only, not to keys.
  70// b at top level implies bb.
  71// The remaining options can be applied to search keys.
  72
  73#define FLAG_bb (1<<31)  // Ignore trailing blanks
  74
  75struct sort_key {
  76  struct sort_key *next_key;  // linked list
  77  unsigned range[4];          // start word, start char, end word, end char
  78  int flags;
  79};
  80
  81// Copy of the part of this string corresponding to a key/flags.
  82
  83static char *get_key_data(char *str, struct sort_key *key, int flags)
  84{
  85  int start = 0, end, len, i, j;
  86
  87  // Special case whole string, so we don't have to make a copy
  88
  89  if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
  90    && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
  91
  92  // Find start of key on first pass, end on second pass
  93
  94  len = strlen(str);
  95  for (j=0; j<2; j++) {
  96    if (!key->range[2*j]) end=len;
  97
  98    // Loop through fields
  99    else {
 100      end = 0;
 101      for (i = 1; i < key->range[2*j]+j; i++) {
 102
 103        // Skip leading blanks
 104        if (str[end] && !TT.t) while (isspace(str[end])) end++;
 105
 106        // Skip body of key
 107        for (; str[end]; end++) {
 108          if (TT.t) {
 109            if (str[end]==*TT.t) {
 110              end++;
 111              break;
 112            }
 113          } else if (isspace(str[end])) break;
 114        }
 115      }
 116    }
 117    if (!j) start = end;
 118  }
 119
 120  // Key with explicit separator starts after the separator
 121  if (TT.t && str[start]==*TT.t) start++;
 122
 123  // Strip leading and trailing whitespace if necessary
 124  if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
 125    while (isspace(str[start])) start++;
 126  if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
 127
 128  // Handle offsets on start and end
 129  if (key->range[3]) {
 130    end += key->range[3]-1;
 131    if (end>len) end=len;
 132  }
 133  if (key->range[1]) {
 134    start += key->range[1]-1;
 135    if (start>len) start=len;
 136  }
 137
 138  // Make the copy
 139  if (end<start) end = start;
 140  str = xstrndup(str+start, end-start);
 141
 142  // Handle -d
 143  if (flags&FLAG_d) {
 144    for (start = end = 0; str[end]; end++)
 145      if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
 146    str[start] = 0;
 147  }
 148
 149  // Handle -i
 150  if (flags&FLAG_i) {
 151    for (start = end = 0; str[end]; end++)
 152      if (isprint(str[end])) str[start++] = str[end];
 153    str[start] = 0;
 154  }
 155
 156  return str;
 157}
 158
 159// append a sort_key to key_list.
 160
 161static struct sort_key *add_key(void)
 162{
 163  void **stupid_compiler = &TT.key_list;
 164  struct sort_key **pkey = (struct sort_key **)stupid_compiler;
 165
 166  while (*pkey) pkey = &((*pkey)->next_key);
 167  return *pkey = xzalloc(sizeof(struct sort_key));
 168}
 169
 170// Perform actual comparison
 171static int compare_values(int flags, char *x, char *y)
 172{
 173  if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
 174    char *xx,*yy;
 175    double dx = strtod(x,&xx), dy = strtod(y,&yy);
 176    int xinf, yinf;
 177
 178    // not numbers < NaN < -infinity < numbers < +infinity
 179
 180    if (x==xx) return y==yy ? 0 : -1;
 181    if (y==yy) return 1;
 182
 183    // Check for isnan
 184    if (dx!=dx) return (dy!=dy) ? 0 : -1;
 185    if (dy!=dy) return 1;
 186
 187    // Check for infinity.  (Could underflow, but avoids needing libm.)
 188    xinf = (1.0/dx == 0.0);
 189    yinf = (1.0/dy == 0.0);
 190    if (xinf) {
 191      if(dx<0) return (yinf && dy<0) ? 0 : -1;
 192      return (yinf && dy>0) ? 0 : 1;
 193    }
 194    if (yinf) return dy<0 ? 1 : -1;
 195
 196    return dx<dy ? -1 : dx>dy;
 197  } else if (flags & FLAG_M) {
 198    struct tm thyme;
 199    int dx;
 200    char *xx,*yy;
 201
 202    xx = strptime(x,"%b",&thyme);
 203    dx = thyme.tm_mon;
 204    yy = strptime(y,"%b",&thyme);
 205    if (!xx) return !yy ? 0 : -1;
 206    else if (!yy) return 1;
 207    else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
 208
 209  } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
 210  else if (flags & FLAG_V) {
 211    while (*x && *y) {
 212      while (*x && *x == *y) x++, y++;
 213      if (isdigit(*x) && isdigit(*y)) {
 214        long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
 215
 216        if (xx<yy) return -1;
 217        if (xx>yy) return 1;
 218      } else {
 219        char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
 220
 221        // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
 222        if (xx != yy) {
 223          if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
 224          else return 1;
 225        }
 226      }
 227    }
 228    return *x ? !!*y : -1;
 229  // This is actually an integer sort with decimals sorted by string fallback.
 230  } else if (flags & FLAG_n) {
 231    long long dx = atoll(x), dy = atoll(y);
 232
 233    return dx<dy ? -1 : dx>dy;
 234
 235  // Ascii sort
 236  } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
 237}
 238
 239// Callback from qsort(): Iterate through key_list and perform comparisons.
 240static int compare_keys(const void *xarg, const void *yarg)
 241{
 242  int flags = toys.optflags, retval = 0;
 243  char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
 244  struct sort_key *key;
 245
 246  for (key=(void *)TT.key_list; !retval && key; key = key->next_key) {
 247    flags = key->flags ? : toys.optflags;
 248
 249    // Chop out and modify key chunks, handling -dfib
 250
 251    x = get_key_data(xx, key, flags);
 252    y = get_key_data(yy, key, flags);
 253
 254    retval = compare_values(flags, x, y);
 255
 256    // Free the copies get_key_data() made.
 257
 258    if (x != xx) free(x);
 259    if (y != yy) free(y);
 260
 261    if (retval) break;
 262  }
 263
 264  // Perform fallback sort if necessary (always case insensitive, no -f,
 265  // the point is to get a stable order even for -f sorts)
 266  if (!retval && !FLAG(s)) {
 267    flags = toys.optflags;
 268    retval = strcmp(xx, yy);
 269  }
 270
 271  return retval * ((flags&FLAG_r) ? -1 : 1);
 272}
 273
 274// Read each line from file, appending to a big array.
 275static void sort_lines(char **pline, long len)
 276{
 277  char *line;
 278
 279  if (!pline) return;
 280  line = *pline;
 281  if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0;
 282  *pline = 0;
 283
 284  // handle -c here so we don't allocate more memory than necessary.
 285  if (FLAG(c)) {
 286    if (TT.lines && compare_keys((void *)&TT.lines, &line)>-!!FLAG(u))
 287      error_exit("%s: Check line %u\n", TT.name, TT.linecount);
 288    free(TT.lines);
 289    TT.lines = (void *)line;
 290  } else {
 291    if (!(TT.linecount&63))
 292      TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
 293    TT.lines[TT.linecount] = line;
 294  }
 295  TT.linecount++;
 296}
 297
 298// Callback from loopfiles to handle input files.
 299static void sort_read(int fd, char *name)
 300{
 301  TT.name = name;
 302  do_lines(fd, FLAG(z) ? '\0' : '\n', sort_lines);
 303}
 304
 305void sort_main(void)
 306{
 307  int idx, jdx, fd = 1;
 308
 309  if (FLAG(u)) toys.optflags |= FLAG_s;
 310
 311  // Parse -k sort keys.
 312  if (TT.k) {
 313    struct arg_list *arg;
 314
 315    for (arg = TT.k; arg; arg = arg->next) {
 316      struct sort_key *key = add_key();
 317      char *temp, *temp2, *optlist;
 318      int flag;
 319
 320      idx = 0;
 321      temp = arg->arg;
 322      while (*temp) {
 323        // Start of range
 324        key->range[2*idx] = strtol(temp, &temp, 10);
 325        if (*temp=='.') key->range[(2*idx)+1] = strtol(temp+1, &temp, 10);
 326
 327        // Handle flags appended to a key type.
 328        for (;*temp;temp++) {
 329
 330          // Second comma becomes an "Unknown key" error.
 331          if (*temp==',' && !idx++) {
 332            temp++;
 333            break;
 334          }
 335
 336          // Which flag is this?
 337          optlist = toys.which->options;
 338          temp2 = strchr(optlist, *temp);
 339          flag = 1<<(optlist-temp2+strlen(optlist)-1);
 340
 341          // Was it a flag that can apply to a key?
 342          if (!temp2 || flag>FLAG_x || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) {
 343            toys.exitval = 2;
 344            error_exit("Unknown key option.");
 345          }
 346          // b after , means strip _trailing_ space, not leading.
 347          if (idx && flag==FLAG_b) flag = FLAG_bb;
 348          key->flags |= flag;
 349        }
 350      }
 351    }
 352  }
 353
 354  // global b flag strips both leading and trailing spaces
 355  if (FLAG(b)) toys.optflags |= FLAG_bb;
 356
 357  // If no keys, perform alphabetic sort over the whole line.
 358  if (!TT.key_list) add_key()->range[0] = 1;
 359
 360  // Open input files and read data, populating TT.lines[TT.linecount]
 361  loopfiles(toys.optargs, sort_read);
 362
 363  // The compare (-c) logic was handled in sort_read(),
 364  // so if we got here, we're done.
 365  if (FLAG(c)) goto exit_now;
 366
 367  // Perform the actual sort
 368  qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
 369
 370  // handle unique (-u)
 371  if (FLAG(u)) {
 372    for (jdx=0, idx=1; idx<TT.linecount; idx++) {
 373      if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) free(TT.lines[idx]);
 374      else TT.lines[++jdx] = TT.lines[idx];
 375    }
 376    if (TT.linecount) TT.linecount = jdx+1;
 377  }
 378
 379  // Open output file if necessary. We can't do this until we've finished
 380  // reading in case the output file is one of the input files.
 381  if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
 382
 383  // Output result
 384  for (idx = 0; idx<TT.linecount; idx++) {
 385    char *s = TT.lines[idx];
 386    unsigned i = strlen(s);
 387
 388    if (!FLAG(z)) s[i] = '\n';
 389    xwrite(fd, s, i+1);
 390    if (CFG_TOYBOX_FREE) free(s);
 391  }
 392
 393exit_now:
 394  if (CFG_TOYBOX_FREE) {
 395    if (fd != 1) close(fd);
 396    free(TT.lines);
 397  }
 398}
 399