uboot/lib/bzip2/bzlib_blocksort.c
<<
>>
Prefs
   1
   2/*-------------------------------------------------------------*/
   3/*--- Block sorting machinery                               ---*/
   4/*---                                           blocksort.c ---*/
   5/*-------------------------------------------------------------*/
   6
   7/*--
   8  This file is a part of bzip2 and/or libbzip2, a program and
   9  library for lossless, block-sorting data compression.
  10
  11  Copyright (C) 1996-2002 Julian R Seward.  All rights reserved.
  12
  13  Redistribution and use in source and binary forms, with or without
  14  modification, are permitted provided that the following conditions
  15  are met:
  16
  17  1. Redistributions of source code must retain the above copyright
  18     notice, this list of conditions and the following disclaimer.
  19
  20  2. The origin of this software must not be misrepresented; you must
  21     not claim that you wrote the original software.  If you use this
  22     software in a product, an acknowledgment in the product
  23     documentation would be appreciated but is not required.
  24
  25  3. Altered source versions must be plainly marked as such, and must
  26     not be misrepresented as being the original software.
  27
  28  4. The name of the author may not be used to endorse or promote
  29     products derived from this software without specific prior written
  30     permission.
  31
  32  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  33  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  34  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  36  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  37  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  38  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  39  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  40  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  41  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  42  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43
  44  Julian Seward, Cambridge, UK.
  45  jseward@acm.org
  46  bzip2/libbzip2 version 1.0.6 of 6 September 2010
  47  Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
  48
  49  This program is based on (at least) the work of:
  50     Mike Burrows
  51     David Wheeler
  52     Peter Fenwick
  53     Alistair Moffat
  54     Radford Neal
  55     Ian H. Witten
  56     Robert Sedgewick
  57     Jon L. Bentley
  58
  59  For more information on these sources, see the manual.
  60--*/
  61
  62#include "bzlib_private.h"
  63#include <log.h>
  64
  65/*---------------------------------------------*/
  66/*--- Fallback O(N log(N)^2) sorting        ---*/
  67/*--- algorithm, for repetitive blocks      ---*/
  68/*---------------------------------------------*/
  69
  70/*---------------------------------------------*/
  71static 
  72__inline__
  73void fallbackSimpleSort ( UInt32* fmap, 
  74                          UInt32* eclass, 
  75                          Int32   lo, 
  76                          Int32   hi )
  77{
  78   Int32 i, j, tmp;
  79   UInt32 ec_tmp;
  80
  81   if (lo == hi) return;
  82
  83   if (hi - lo > 3) {
  84      for ( i = hi-4; i >= lo; i-- ) {
  85         tmp = fmap[i];
  86         ec_tmp = eclass[tmp];
  87         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
  88            fmap[j-4] = fmap[j];
  89         fmap[j-4] = tmp;
  90      }
  91   }
  92
  93   for ( i = hi-1; i >= lo; i-- ) {
  94      tmp = fmap[i];
  95      ec_tmp = eclass[tmp];
  96      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
  97         fmap[j-1] = fmap[j];
  98      fmap[j-1] = tmp;
  99   }
 100}
 101
 102
 103/*---------------------------------------------*/
 104#define fswap(zz1, zz2) \
 105   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
 106
 107#define fvswap(zzp1, zzp2, zzn)       \
 108{                                     \
 109   Int32 yyp1 = (zzp1);               \
 110   Int32 yyp2 = (zzp2);               \
 111   Int32 yyn  = (zzn);                \
 112   while (yyn > 0) {                  \
 113      fswap(fmap[yyp1], fmap[yyp2]);  \
 114      yyp1++; yyp2++; yyn--;          \
 115   }                                  \
 116}
 117
 118
 119#define fmin(a,b) ((a) < (b)) ? (a) : (b)
 120
 121#define fpush(lz,hz) { stackLo[sp] = lz; \
 122                       stackHi[sp] = hz; \
 123                       sp++; }
 124
 125#define fpop(lz,hz) { sp--;              \
 126                      lz = stackLo[sp];  \
 127                      hz = stackHi[sp]; }
 128
 129#define FALLBACK_QSORT_SMALL_THRESH 10
 130#define FALLBACK_QSORT_STACK_SIZE   100
 131
 132
 133static
 134void fallbackQSort3 ( UInt32* fmap, 
 135                      UInt32* eclass,
 136                      Int32   loSt, 
 137                      Int32   hiSt )
 138{
 139   Int32 unLo, unHi, ltLo, gtHi, n, m;
 140   Int32 sp, lo, hi;
 141   UInt32 med, r, r3;
 142   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
 143   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
 144
 145   r = 0;
 146
 147   sp = 0;
 148   fpush ( loSt, hiSt );
 149
 150   while (sp > 0) {
 151
 152      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
 153
 154      fpop ( lo, hi );
 155      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
 156         fallbackSimpleSort ( fmap, eclass, lo, hi );
 157         continue;
 158      }
 159
 160      /* Random partitioning.  Median of 3 sometimes fails to
 161         avoid bad cases.  Median of 9 seems to help but 
 162         looks rather expensive.  This too seems to work but
 163         is cheaper.  Guidance for the magic constants 
 164         7621 and 32768 is taken from Sedgewick's algorithms
 165         book, chapter 35.
 166      */
 167      r = ((r * 7621) + 1) % 32768;
 168      r3 = r % 3;
 169      if (r3 == 0) med = eclass[fmap[lo]]; else
 170      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
 171                   med = eclass[fmap[hi]];
 172
 173      unLo = ltLo = lo;
 174      unHi = gtHi = hi;
 175
 176      while (1) {
 177         while (1) {
 178            if (unLo > unHi) break;
 179            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
 180            if (n == 0) { 
 181               fswap(fmap[unLo], fmap[ltLo]); 
 182               ltLo++; unLo++; 
 183               continue; 
 184            };
 185            if (n > 0) break;
 186            unLo++;
 187         }
 188         while (1) {
 189            if (unLo > unHi) break;
 190            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
 191            if (n == 0) { 
 192               fswap(fmap[unHi], fmap[gtHi]); 
 193               gtHi--; unHi--; 
 194               continue; 
 195            };
 196            if (n < 0) break;
 197            unHi--;
 198         }
 199         if (unLo > unHi) break;
 200         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
 201      }
 202
 203      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
 204
 205      if (gtHi < ltLo) continue;
 206
 207      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
 208      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
 209
 210      n = lo + unLo - ltLo - 1;
 211      m = hi - (gtHi - unHi) + 1;
 212
 213      if (n - lo > hi - m) {
 214         fpush ( lo, n );
 215         fpush ( m, hi );
 216      } else {
 217         fpush ( m, hi );
 218         fpush ( lo, n );
 219      }
 220   }
 221}
 222
 223#undef fmin
 224#undef fpush
 225#undef fpop
 226#undef fswap
 227#undef fvswap
 228#undef FALLBACK_QSORT_SMALL_THRESH
 229#undef FALLBACK_QSORT_STACK_SIZE
 230
 231
 232/*---------------------------------------------*/
 233/* Pre:
 234      nblock > 0
 235      eclass exists for [0 .. nblock-1]
 236      ((UChar*)eclass) [0 .. nblock-1] holds block
 237      ptr exists for [0 .. nblock-1]
 238
 239   Post:
 240      ((UChar*)eclass) [0 .. nblock-1] holds block
 241      All other areas of eclass destroyed
 242      fmap [0 .. nblock-1] holds sorted order
 243      bhtab [ 0 .. 2+(nblock/32) ] destroyed
 244*/
 245
 246#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
 247#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
 248#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
 249#define      WORD_BH(zz)  bhtab[(zz) >> 5]
 250#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
 251
 252static
 253void fallbackSort ( UInt32* fmap, 
 254                    UInt32* eclass, 
 255                    UInt32* bhtab,
 256                    Int32   nblock,
 257                    Int32   verb )
 258{
 259   Int32 ftab[257];
 260   Int32 ftabCopy[256];
 261   Int32 H, i, j, k, l, r, cc, cc1;
 262   Int32 nNotDone;
 263   Int32 nBhtab;
 264   UChar* eclass8 = (UChar*)eclass;
 265
 266   /*--
 267      Initial 1-char radix sort to generate
 268      initial fmap and initial BH bits.
 269   --*/
 270   if (verb >= 4)
 271      VPrintf0 ( "        bucket sorting ...\n" );
 272   for (i = 0; i < 257;    i++) ftab[i] = 0;
 273   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
 274   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
 275   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
 276
 277   for (i = 0; i < nblock; i++) {
 278      j = eclass8[i];
 279      k = ftab[j] - 1;
 280      ftab[j] = k;
 281      fmap[k] = i;
 282   }
 283
 284   nBhtab = 2 + (nblock / 32);
 285   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
 286   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
 287
 288   /*--
 289      Inductively refine the buckets.  Kind-of an
 290      "exponential radix sort" (!), inspired by the
 291      Manber-Myers suffix array construction algorithm.
 292   --*/
 293
 294   /*-- set sentinel bits for block-end detection --*/
 295   for (i = 0; i < 32; i++) { 
 296      SET_BH(nblock + 2*i);
 297      CLEAR_BH(nblock + 2*i + 1);
 298   }
 299
 300   /*-- the log(N) loop --*/
 301   H = 1;
 302   while (1) {
 303
 304      if (verb >= 4) 
 305         VPrintf1 ( "        depth %6d has ", H );
 306
 307      j = 0;
 308      for (i = 0; i < nblock; i++) {
 309         if (ISSET_BH(i)) j = i;
 310         k = fmap[i] - H; if (k < 0) k += nblock;
 311         eclass[k] = j;
 312      }
 313
 314      nNotDone = 0;
 315      r = -1;
 316      while (1) {
 317
 318         /*-- find the next non-singleton bucket --*/
 319         k = r + 1;
 320         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
 321         if (ISSET_BH(k)) {
 322            while (WORD_BH(k) == 0xffffffff) k += 32;
 323            while (ISSET_BH(k)) k++;
 324         }
 325         l = k - 1;
 326         if (l >= nblock) break;
 327         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
 328         if (!ISSET_BH(k)) {
 329            while (WORD_BH(k) == 0x00000000) k += 32;
 330            while (!ISSET_BH(k)) k++;
 331         }
 332         r = k - 1;
 333         if (r >= nblock) break;
 334
 335         /*-- now [l, r] bracket current bucket --*/
 336         if (r > l) {
 337            nNotDone += (r - l + 1);
 338            fallbackQSort3 ( fmap, eclass, l, r );
 339
 340            /*-- scan bucket and generate header bits-- */
 341            cc = -1;
 342            for (i = l; i <= r; i++) {
 343               cc1 = eclass[fmap[i]];
 344               if (cc != cc1) { SET_BH(i); cc = cc1; };
 345            }
 346         }
 347      }
 348
 349      if (verb >= 4) 
 350         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
 351
 352      H *= 2;
 353      if (H > nblock || nNotDone == 0) break;
 354   }
 355
 356   /*-- 
 357      Reconstruct the original block in
 358      eclass8 [0 .. nblock-1], since the
 359      previous phase destroyed it.
 360   --*/
 361   if (verb >= 4)
 362      VPrintf0 ( "        reconstructing block ...\n" );
 363   j = 0;
 364   for (i = 0; i < nblock; i++) {
 365      while (ftabCopy[j] == 0) j++;
 366      ftabCopy[j]--;
 367      eclass8[fmap[i]] = (UChar)j;
 368   }
 369   AssertH ( j < 256, 1005 );
 370}
 371
 372#undef       SET_BH
 373#undef     CLEAR_BH
 374#undef     ISSET_BH
 375#undef      WORD_BH
 376#undef UNALIGNED_BH
 377
 378
 379/*---------------------------------------------*/
 380/*--- The main, O(N^2 log(N)) sorting       ---*/
 381/*--- algorithm.  Faster for "normal"       ---*/
 382/*--- non-repetitive blocks.                ---*/
 383/*---------------------------------------------*/
 384
 385/*---------------------------------------------*/
 386static
 387__inline__
 388Bool mainGtU ( UInt32  i1, 
 389               UInt32  i2,
 390               UChar*  block, 
 391               UInt16* quadrant,
 392               UInt32  nblock,
 393               Int32*  budget )
 394{
 395   Int32  k;
 396   UChar  c1, c2;
 397   UInt16 s1, s2;
 398
 399   AssertD ( i1 != i2, "mainGtU" );
 400   /* 1 */
 401   c1 = block[i1]; c2 = block[i2];
 402   if (c1 != c2) return (c1 > c2);
 403   i1++; i2++;
 404   /* 2 */
 405   c1 = block[i1]; c2 = block[i2];
 406   if (c1 != c2) return (c1 > c2);
 407   i1++; i2++;
 408   /* 3 */
 409   c1 = block[i1]; c2 = block[i2];
 410   if (c1 != c2) return (c1 > c2);
 411   i1++; i2++;
 412   /* 4 */
 413   c1 = block[i1]; c2 = block[i2];
 414   if (c1 != c2) return (c1 > c2);
 415   i1++; i2++;
 416   /* 5 */
 417   c1 = block[i1]; c2 = block[i2];
 418   if (c1 != c2) return (c1 > c2);
 419   i1++; i2++;
 420   /* 6 */
 421   c1 = block[i1]; c2 = block[i2];
 422   if (c1 != c2) return (c1 > c2);
 423   i1++; i2++;
 424   /* 7 */
 425   c1 = block[i1]; c2 = block[i2];
 426   if (c1 != c2) return (c1 > c2);
 427   i1++; i2++;
 428   /* 8 */
 429   c1 = block[i1]; c2 = block[i2];
 430   if (c1 != c2) return (c1 > c2);
 431   i1++; i2++;
 432   /* 9 */
 433   c1 = block[i1]; c2 = block[i2];
 434   if (c1 != c2) return (c1 > c2);
 435   i1++; i2++;
 436   /* 10 */
 437   c1 = block[i1]; c2 = block[i2];
 438   if (c1 != c2) return (c1 > c2);
 439   i1++; i2++;
 440   /* 11 */
 441   c1 = block[i1]; c2 = block[i2];
 442   if (c1 != c2) return (c1 > c2);
 443   i1++; i2++;
 444   /* 12 */
 445   c1 = block[i1]; c2 = block[i2];
 446   if (c1 != c2) return (c1 > c2);
 447   i1++; i2++;
 448
 449   k = nblock + 8;
 450
 451   do {
 452      /* 1 */
 453      c1 = block[i1]; c2 = block[i2];
 454      if (c1 != c2) return (c1 > c2);
 455      s1 = quadrant[i1]; s2 = quadrant[i2];
 456      if (s1 != s2) return (s1 > s2);
 457      i1++; i2++;
 458      /* 2 */
 459      c1 = block[i1]; c2 = block[i2];
 460      if (c1 != c2) return (c1 > c2);
 461      s1 = quadrant[i1]; s2 = quadrant[i2];
 462      if (s1 != s2) return (s1 > s2);
 463      i1++; i2++;
 464      /* 3 */
 465      c1 = block[i1]; c2 = block[i2];
 466      if (c1 != c2) return (c1 > c2);
 467      s1 = quadrant[i1]; s2 = quadrant[i2];
 468      if (s1 != s2) return (s1 > s2);
 469      i1++; i2++;
 470      /* 4 */
 471      c1 = block[i1]; c2 = block[i2];
 472      if (c1 != c2) return (c1 > c2);
 473      s1 = quadrant[i1]; s2 = quadrant[i2];
 474      if (s1 != s2) return (s1 > s2);
 475      i1++; i2++;
 476      /* 5 */
 477      c1 = block[i1]; c2 = block[i2];
 478      if (c1 != c2) return (c1 > c2);
 479      s1 = quadrant[i1]; s2 = quadrant[i2];
 480      if (s1 != s2) return (s1 > s2);
 481      i1++; i2++;
 482      /* 6 */
 483      c1 = block[i1]; c2 = block[i2];
 484      if (c1 != c2) return (c1 > c2);
 485      s1 = quadrant[i1]; s2 = quadrant[i2];
 486      if (s1 != s2) return (s1 > s2);
 487      i1++; i2++;
 488      /* 7 */
 489      c1 = block[i1]; c2 = block[i2];
 490      if (c1 != c2) return (c1 > c2);
 491      s1 = quadrant[i1]; s2 = quadrant[i2];
 492      if (s1 != s2) return (s1 > s2);
 493      i1++; i2++;
 494      /* 8 */
 495      c1 = block[i1]; c2 = block[i2];
 496      if (c1 != c2) return (c1 > c2);
 497      s1 = quadrant[i1]; s2 = quadrant[i2];
 498      if (s1 != s2) return (s1 > s2);
 499      i1++; i2++;
 500
 501      if (i1 >= nblock) i1 -= nblock;
 502      if (i2 >= nblock) i2 -= nblock;
 503
 504      k -= 8;
 505      (*budget)--;
 506   }
 507      while (k >= 0);
 508
 509   return False;
 510}
 511
 512
 513/*---------------------------------------------*/
 514/*--
 515   Knuth's increments seem to work better
 516   than Incerpi-Sedgewick here.  Possibly
 517   because the number of elems to sort is
 518   usually small, typically <= 20.
 519--*/
 520static
 521Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
 522                   9841, 29524, 88573, 265720,
 523                   797161, 2391484 };
 524
 525static
 526void mainSimpleSort ( UInt32* ptr,
 527                      UChar*  block,
 528                      UInt16* quadrant,
 529                      Int32   nblock,
 530                      Int32   lo, 
 531                      Int32   hi, 
 532                      Int32   d,
 533                      Int32*  budget )
 534{
 535   Int32 i, j, h, bigN, hp;
 536   UInt32 v;
 537
 538   bigN = hi - lo + 1;
 539   if (bigN < 2) return;
 540
 541   hp = 0;
 542   while (incs[hp] < bigN) hp++;
 543   hp--;
 544
 545   for (; hp >= 0; hp--) {
 546      h = incs[hp];
 547
 548      i = lo + h;
 549      while (True) {
 550
 551         /*-- copy 1 --*/
 552         if (i > hi) break;
 553         v = ptr[i];
 554         j = i;
 555         while ( mainGtU ( 
 556                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
 557                 ) ) {
 558            ptr[j] = ptr[j-h];
 559            j = j - h;
 560            if (j <= (lo + h - 1)) break;
 561         }
 562         ptr[j] = v;
 563         i++;
 564
 565         /*-- copy 2 --*/
 566         if (i > hi) break;
 567         v = ptr[i];
 568         j = i;
 569         while ( mainGtU ( 
 570                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
 571                 ) ) {
 572            ptr[j] = ptr[j-h];
 573            j = j - h;
 574            if (j <= (lo + h - 1)) break;
 575         }
 576         ptr[j] = v;
 577         i++;
 578
 579         /*-- copy 3 --*/
 580         if (i > hi) break;
 581         v = ptr[i];
 582         j = i;
 583         while ( mainGtU ( 
 584                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
 585                 ) ) {
 586            ptr[j] = ptr[j-h];
 587            j = j - h;
 588            if (j <= (lo + h - 1)) break;
 589         }
 590         ptr[j] = v;
 591         i++;
 592
 593         if (*budget < 0) return;
 594      }
 595   }
 596}
 597
 598
 599/*---------------------------------------------*/
 600/*--
 601   The following is an implementation of
 602   an elegant 3-way quicksort for strings,
 603   described in a paper "Fast Algorithms for
 604   Sorting and Searching Strings", by Robert
 605   Sedgewick and Jon L. Bentley.
 606--*/
 607
 608#define mswap(zz1, zz2) \
 609   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
 610
 611#define mvswap(zzp1, zzp2, zzn)       \
 612{                                     \
 613   Int32 yyp1 = (zzp1);               \
 614   Int32 yyp2 = (zzp2);               \
 615   Int32 yyn  = (zzn);                \
 616   while (yyn > 0) {                  \
 617      mswap(ptr[yyp1], ptr[yyp2]);    \
 618      yyp1++; yyp2++; yyn--;          \
 619   }                                  \
 620}
 621
 622static 
 623__inline__
 624UChar mmed3 ( UChar a, UChar b, UChar c )
 625{
 626   UChar t;
 627   if (a > b) { t = a; a = b; b = t; };
 628   if (b > c) { 
 629      b = c;
 630      if (a > b) b = a;
 631   }
 632   return b;
 633}
 634
 635#define mmin(a,b) ((a) < (b)) ? (a) : (b)
 636
 637#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
 638                          stackHi[sp] = hz; \
 639                          stackD [sp] = dz; \
 640                          sp++; }
 641
 642#define mpop(lz,hz,dz) { sp--;             \
 643                         lz = stackLo[sp]; \
 644                         hz = stackHi[sp]; \
 645                         dz = stackD [sp]; }
 646
 647
 648#define mnextsize(az) (nextHi[az]-nextLo[az])
 649
 650#define mnextswap(az,bz)                                        \
 651   { Int32 tz;                                                  \
 652     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
 653     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
 654     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
 655
 656
 657#define MAIN_QSORT_SMALL_THRESH 20
 658#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
 659#define MAIN_QSORT_STACK_SIZE 100
 660
 661static
 662void mainQSort3 ( UInt32* ptr,
 663                  UChar*  block,
 664                  UInt16* quadrant,
 665                  Int32   nblock,
 666                  Int32   loSt, 
 667                  Int32   hiSt, 
 668                  Int32   dSt,
 669                  Int32*  budget )
 670{
 671   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
 672   Int32 sp, lo, hi, d;
 673
 674   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
 675   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
 676   Int32 stackD [MAIN_QSORT_STACK_SIZE];
 677
 678   Int32 nextLo[3];
 679   Int32 nextHi[3];
 680   Int32 nextD [3];
 681
 682   sp = 0;
 683   mpush ( loSt, hiSt, dSt );
 684
 685   while (sp > 0) {
 686
 687      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
 688
 689      mpop ( lo, hi, d );
 690      if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
 691          d > MAIN_QSORT_DEPTH_THRESH) {
 692         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
 693         if (*budget < 0) return;
 694         continue;
 695      }
 696
 697      med = (Int32) 
 698            mmed3 ( block[ptr[ lo         ]+d],
 699                    block[ptr[ hi         ]+d],
 700                    block[ptr[ (lo+hi)>>1 ]+d] );
 701
 702      unLo = ltLo = lo;
 703      unHi = gtHi = hi;
 704
 705      while (True) {
 706         while (True) {
 707            if (unLo > unHi) break;
 708            n = ((Int32)block[ptr[unLo]+d]) - med;
 709            if (n == 0) { 
 710               mswap(ptr[unLo], ptr[ltLo]); 
 711               ltLo++; unLo++; continue; 
 712            };
 713            if (n >  0) break;
 714            unLo++;
 715         }
 716         while (True) {
 717            if (unLo > unHi) break;
 718            n = ((Int32)block[ptr[unHi]+d]) - med;
 719            if (n == 0) { 
 720               mswap(ptr[unHi], ptr[gtHi]); 
 721               gtHi--; unHi--; continue; 
 722            };
 723            if (n <  0) break;
 724            unHi--;
 725         }
 726         if (unLo > unHi) break;
 727         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
 728      }
 729
 730      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
 731
 732      if (gtHi < ltLo) {
 733         mpush(lo, hi, d+1 );
 734         continue;
 735      }
 736
 737      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
 738      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
 739
 740      n = lo + unLo - ltLo - 1;
 741      m = hi - (gtHi - unHi) + 1;
 742
 743      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
 744      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
 745      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
 746
 747      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
 748      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
 749      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
 750
 751      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
 752      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
 753
 754      mpush (nextLo[0], nextHi[0], nextD[0]);
 755      mpush (nextLo[1], nextHi[1], nextD[1]);
 756      mpush (nextLo[2], nextHi[2], nextD[2]);
 757   }
 758}
 759
 760#undef mswap
 761#undef mvswap
 762#undef mpush
 763#undef mpop
 764#undef mmin
 765#undef mnextsize
 766#undef mnextswap
 767#undef MAIN_QSORT_SMALL_THRESH
 768#undef MAIN_QSORT_DEPTH_THRESH
 769#undef MAIN_QSORT_STACK_SIZE
 770
 771
 772/*---------------------------------------------*/
 773/* Pre:
 774      nblock > N_OVERSHOOT
 775      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
 776      ((UChar*)block32) [0 .. nblock-1] holds block
 777      ptr exists for [0 .. nblock-1]
 778
 779   Post:
 780      ((UChar*)block32) [0 .. nblock-1] holds block
 781      All other areas of block32 destroyed
 782      ftab [0 .. 65536 ] destroyed
 783      ptr [0 .. nblock-1] holds sorted order
 784      if (*budget < 0), sorting was abandoned
 785*/
 786
 787#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
 788#define SETMASK (1 << 21)
 789#define CLEARMASK (~(SETMASK))
 790
 791static
 792void mainSort ( UInt32* ptr, 
 793                UChar*  block,
 794                UInt16* quadrant, 
 795                UInt32* ftab,
 796                Int32   nblock,
 797                Int32   verb,
 798                Int32*  budget )
 799{
 800   Int32  i, j, k, ss, sb;
 801   Int32  runningOrder[256];
 802   Bool   bigDone[256];
 803   Int32  copyStart[256];
 804   Int32  copyEnd  [256];
 805   UChar  c1;
 806   Int32  numQSorted;
 807   UInt16 s;
 808   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
 809
 810   /*-- set up the 2-byte frequency table --*/
 811   for (i = 65536; i >= 0; i--) ftab[i] = 0;
 812
 813   j = block[0] << 8;
 814   i = nblock-1;
 815   for (; i >= 3; i -= 4) {
 816      quadrant[i] = 0;
 817      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
 818      ftab[j]++;
 819      quadrant[i-1] = 0;
 820      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
 821      ftab[j]++;
 822      quadrant[i-2] = 0;
 823      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
 824      ftab[j]++;
 825      quadrant[i-3] = 0;
 826      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
 827      ftab[j]++;
 828   }
 829   for (; i >= 0; i--) {
 830      quadrant[i] = 0;
 831      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
 832      ftab[j]++;
 833   }
 834
 835   /*-- (emphasises close relationship of block & quadrant) --*/
 836   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
 837      block   [nblock+i] = block[i];
 838      quadrant[nblock+i] = 0;
 839   }
 840
 841   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
 842
 843   /*-- Complete the initial radix sort --*/
 844   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
 845
 846   s = block[0] << 8;
 847   i = nblock-1;
 848   for (; i >= 3; i -= 4) {
 849      s = (s >> 8) | (block[i] << 8);
 850      j = ftab[s] -1;
 851      ftab[s] = j;
 852      ptr[j] = i;
 853      s = (s >> 8) | (block[i-1] << 8);
 854      j = ftab[s] -1;
 855      ftab[s] = j;
 856      ptr[j] = i-1;
 857      s = (s >> 8) | (block[i-2] << 8);
 858      j = ftab[s] -1;
 859      ftab[s] = j;
 860      ptr[j] = i-2;
 861      s = (s >> 8) | (block[i-3] << 8);
 862      j = ftab[s] -1;
 863      ftab[s] = j;
 864      ptr[j] = i-3;
 865   }
 866   for (; i >= 0; i--) {
 867      s = (s >> 8) | (block[i] << 8);
 868      j = ftab[s] -1;
 869      ftab[s] = j;
 870      ptr[j] = i;
 871   }
 872
 873   /*--
 874      Now ftab contains the first loc of every small bucket.
 875      Calculate the running order, from smallest to largest
 876      big bucket.
 877   --*/
 878   for (i = 0; i <= 255; i++) {
 879      bigDone     [i] = False;
 880      runningOrder[i] = i;
 881   }
 882
 883   {
 884      Int32 vv;
 885      Int32 h = 1;
 886      do h = 3 * h + 1; while (h <= 256);
 887      do {
 888         h = h / 3;
 889         for (i = h; i <= 255; i++) {
 890            vv = runningOrder[i];
 891            j = i;
 892            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
 893               runningOrder[j] = runningOrder[j-h];
 894               j = j - h;
 895               if (j <= (h - 1)) goto zero;
 896            }
 897            zero:
 898            runningOrder[j] = vv;
 899         }
 900      } while (h != 1);
 901   }
 902
 903   /*--
 904      The main sorting loop.
 905   --*/
 906
 907   numQSorted = 0;
 908
 909   for (i = 0; i <= 255; i++) {
 910
 911      /*--
 912         Process big buckets, starting with the least full.
 913         Basically this is a 3-step process in which we call
 914         mainQSort3 to sort the small buckets [ss, j], but
 915         also make a big effort to avoid the calls if we can.
 916      --*/
 917      ss = runningOrder[i];
 918
 919      /*--
 920         Step 1:
 921         Complete the big bucket [ss] by quicksorting
 922         any unsorted small buckets [ss, j], for j != ss.  
 923         Hopefully previous pointer-scanning phases have already
 924         completed many of the small buckets [ss, j], so
 925         we don't have to sort them at all.
 926      --*/
 927      for (j = 0; j <= 255; j++) {
 928         if (j != ss) {
 929            sb = (ss << 8) + j;
 930            if ( ! (ftab[sb] & SETMASK) ) {
 931               Int32 lo = ftab[sb]   & CLEARMASK;
 932               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
 933               if (hi > lo) {
 934                  if (verb >= 4)
 935                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
 936                                "done %d   this %d\n",
 937                                ss, j, numQSorted, hi - lo + 1 );
 938                  mainQSort3 ( 
 939                     ptr, block, quadrant, nblock, 
 940                     lo, hi, BZ_N_RADIX, budget 
 941                  );   
 942                  numQSorted += (hi - lo + 1);
 943                  if (*budget < 0) return;
 944               }
 945            }
 946            ftab[sb] |= SETMASK;
 947         }
 948      }
 949
 950      AssertH ( !bigDone[ss], 1006 );
 951
 952      /*--
 953         Step 2:
 954         Now scan this big bucket [ss] so as to synthesise the
 955         sorted order for small buckets [t, ss] for all t,
 956         including, magically, the bucket [ss,ss] too.
 957         This will avoid doing Real Work in subsequent Step 1's.
 958      --*/
 959      {
 960         for (j = 0; j <= 255; j++) {
 961            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
 962            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
 963         }
 964         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
 965            k = ptr[j]-1; if (k < 0) k += nblock;
 966            c1 = block[k];
 967            if (!bigDone[c1])
 968               ptr[ copyStart[c1]++ ] = k;
 969         }
 970         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
 971            k = ptr[j]-1; if (k < 0) k += nblock;
 972            c1 = block[k];
 973            if (!bigDone[c1]) 
 974               ptr[ copyEnd[c1]-- ] = k;
 975         }
 976      }
 977
 978      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
 979                || 
 980                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
 981                   Necessity for this case is demonstrated by compressing 
 982                   a sequence of approximately 48.5 million of character 
 983                   251; 1.0.0/1.0.1 will then die here. */
 984                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
 985                1007 )
 986
 987      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
 988
 989      /*--
 990         Step 3:
 991         The [ss] big bucket is now done.  Record this fact,
 992         and update the quadrant descriptors.  Remember to
 993         update quadrants in the overshoot area too, if
 994         necessary.  The "if (i < 255)" test merely skips
 995         this updating for the last bucket processed, since
 996         updating for the last bucket is pointless.
 997
 998         The quadrant array provides a way to incrementally
 999         cache sort orderings, as they appear, so as to 
1000         make subsequent comparisons in fullGtU() complete
1001         faster.  For repetitive blocks this makes a big
1002         difference (but not big enough to be able to avoid
1003         the fallback sorting mechanism, exponential radix sort).
1004
1005         The precise meaning is: at all times:
1006
1007            for 0 <= i < nblock and 0 <= j <= nblock
1008
1009            if block[i] != block[j], 
1010
1011               then the relative values of quadrant[i] and 
1012                    quadrant[j] are meaningless.
1013
1014               else {
1015                  if quadrant[i] < quadrant[j]
1016                     then the string starting at i lexicographically
1017                     precedes the string starting at j
1018
1019                  else if quadrant[i] > quadrant[j]
1020                     then the string starting at j lexicographically
1021                     precedes the string starting at i
1022
1023                  else
1024                     the relative ordering of the strings starting
1025                     at i and j has not yet been determined.
1026               }
1027      --*/
1028      bigDone[ss] = True;
1029
1030      if (i < 255) {
1031         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
1032         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1033         Int32 shifts   = 0;
1034
1035         while ((bbSize >> shifts) > 65534) shifts++;
1036
1037         for (j = bbSize-1; j >= 0; j--) {
1038            Int32 a2update     = ptr[bbStart + j];
1039            UInt16 qVal        = (UInt16)(j >> shifts);
1040            quadrant[a2update] = qVal;
1041            if (a2update < BZ_N_OVERSHOOT)
1042               quadrant[a2update + nblock] = qVal;
1043         }
1044         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1045      }
1046
1047   }
1048
1049   if (verb >= 4)
1050      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
1051                 nblock, numQSorted, nblock - numQSorted );
1052}
1053
1054#undef BIGFREQ
1055#undef SETMASK
1056#undef CLEARMASK
1057
1058
1059/*---------------------------------------------*/
1060/* Pre:
1061      nblock > 0
1062      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1063      ((UChar*)arr2)  [0 .. nblock-1] holds block
1064      arr1 exists for [0 .. nblock-1]
1065
1066   Post:
1067      ((UChar*)arr2) [0 .. nblock-1] holds block
1068      All other areas of block destroyed
1069      ftab [ 0 .. 65536 ] destroyed
1070      arr1 [0 .. nblock-1] holds sorted order
1071*/
1072void BZ2_blockSort ( EState* s )
1073{
1074   UInt32* ptr    = s->ptr; 
1075   UChar*  block  = s->block;
1076   UInt32* ftab   = s->ftab;
1077   Int32   nblock = s->nblock;
1078   Int32   verb   = s->verbosity;
1079   Int32   wfact  = s->workFactor;
1080   UInt16* quadrant;
1081   Int32   budget;
1082   Int32   budgetInit;
1083   Int32   i;
1084
1085   if (nblock < 10000) {
1086      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1087   } else {
1088      /* Calculate the location for quadrant, remembering to get
1089         the alignment right.  Assumes that &(block[0]) is at least
1090         2-byte aligned -- this should be ok since block is really
1091         the first section of arr2.
1092      */
1093      i = nblock+BZ_N_OVERSHOOT;
1094      if (i & 1) i++;
1095      quadrant = (UInt16*)(&(block[i]));
1096
1097      /* (wfact-1) / 3 puts the default-factor-30
1098         transition point at very roughly the same place as 
1099         with v0.1 and v0.9.0.  
1100         Not that it particularly matters any more, since the
1101         resulting compressed stream is now the same regardless
1102         of whether or not we use the main sort or fallback sort.
1103      */
1104      if (wfact < 1  ) wfact = 1;
1105      if (wfact > 100) wfact = 100;
1106      budgetInit = nblock * ((wfact-1) / 3);
1107      budget = budgetInit;
1108
1109      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1110      if (verb >= 3) 
1111         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
1112                    budgetInit - budget,
1113                    nblock, 
1114                    (float)(budgetInit - budget) /
1115                    (float)(nblock==0 ? 1 : nblock) ); 
1116      if (budget < 0) {
1117         if (verb >= 2) 
1118            VPrintf0 ( "    too repetitive; using fallback"
1119                       " sorting algorithm\n" );
1120         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1121      }
1122   }
1123
1124   s->origPtr = -1;
1125   for (i = 0; i < s->nblock; i++)
1126      if (ptr[i] == 0)
1127         { s->origPtr = i; break; };
1128
1129   AssertH( s->origPtr != -1, 1003 );
1130}
1131
1132
1133/*-------------------------------------------------------------*/
1134/*--- end                                       blocksort.c ---*/
1135/*-------------------------------------------------------------*/
1136