1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#define mswap(zz1, zz2) \
29{ \
30 int32_t zztmp = zz1; \
31 zz1 = zz2; \
32 zz2 = zztmp; \
33}
34
35static
36
37
38void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
39{
40 while (zzn > 0) {
41 mswap(ptr[zzp1], ptr[zzp2]);
42 zzp1++;
43 zzp2++;
44 zzn--;
45 }
46}
47
48static
49ALWAYS_INLINE
50int32_t mmin(int32_t a, int32_t b)
51{
52 return (a < b) ? a : b;
53}
54
55
56
57
58
59
60
61
62static
63inline
64void fallbackSimpleSort(uint32_t* fmap,
65 uint32_t* eclass,
66 int32_t lo,
67 int32_t hi)
68{
69 int32_t i, j, tmp;
70 uint32_t ec_tmp;
71
72 if (lo == hi) return;
73
74 if (hi - lo > 3) {
75 for (i = hi-4; i >= lo; i--) {
76 tmp = fmap[i];
77 ec_tmp = eclass[tmp];
78 for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
79 fmap[j-4] = fmap[j];
80 fmap[j-4] = tmp;
81 }
82 }
83
84 for (i = hi-1; i >= lo; i--) {
85 tmp = fmap[i];
86 ec_tmp = eclass[tmp];
87 for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
88 fmap[j-1] = fmap[j];
89 fmap[j-1] = tmp;
90 }
91}
92
93
94
95#define fpush(lz,hz) { \
96 stackLo[sp] = lz; \
97 stackHi[sp] = hz; \
98 sp++; \
99}
100
101#define fpop(lz,hz) { \
102 sp--; \
103 lz = stackLo[sp]; \
104 hz = stackHi[sp]; \
105}
106
107#define FALLBACK_QSORT_SMALL_THRESH 10
108#define FALLBACK_QSORT_STACK_SIZE 100
109
110static
111void fallbackQSort3(uint32_t* fmap,
112 uint32_t* eclass,
113 int32_t loSt,
114 int32_t hiSt)
115{
116 int32_t unLo, unHi, ltLo, gtHi, n, m;
117 int32_t sp, lo, hi;
118 uint32_t med, r, r3;
119 int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
120 int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
121
122 r = 0;
123
124 sp = 0;
125 fpush(loSt, hiSt);
126
127 while (sp > 0) {
128 AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
129
130 fpop(lo, hi);
131 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
132 fallbackSimpleSort(fmap, eclass, lo, hi);
133 continue;
134 }
135
136
137
138
139
140
141
142
143 r = ((r * 7621) + 1) % 32768;
144 r3 = r % 3;
145 if (r3 == 0)
146 med = eclass[fmap[lo]];
147 else if (r3 == 1)
148 med = eclass[fmap[(lo+hi)>>1]];
149 else
150 med = eclass[fmap[hi]];
151
152 unLo = ltLo = lo;
153 unHi = gtHi = hi;
154
155 while (1) {
156 while (1) {
157 if (unLo > unHi) break;
158 n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
159 if (n == 0) {
160 mswap(fmap[unLo], fmap[ltLo]);
161 ltLo++;
162 unLo++;
163 continue;
164 };
165 if (n > 0) break;
166 unLo++;
167 }
168 while (1) {
169 if (unLo > unHi) break;
170 n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
171 if (n == 0) {
172 mswap(fmap[unHi], fmap[gtHi]);
173 gtHi--; unHi--;
174 continue;
175 };
176 if (n < 0) break;
177 unHi--;
178 }
179 if (unLo > unHi) break;
180 mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
181 }
182
183 AssertD(unHi == unLo-1, "fallbackQSort3(2)");
184
185 if (gtHi < ltLo) continue;
186
187 n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
188 m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
189
190 n = lo + unLo - ltLo - 1;
191 m = hi - (gtHi - unHi) + 1;
192
193 if (n - lo > hi - m) {
194 fpush(lo, n);
195 fpush(m, hi);
196 } else {
197 fpush(m, hi);
198 fpush(lo, n);
199 }
200 }
201}
202
203#undef fpush
204#undef fpop
205#undef FALLBACK_QSORT_SMALL_THRESH
206#undef FALLBACK_QSORT_STACK_SIZE
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
224#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
225#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
226#define WORD_BH(zz) bhtab[(zz) >> 5]
227#define UNALIGNED_BH(zz) ((zz) & 0x01f)
228
229static
230void fallbackSort(uint32_t* fmap,
231 uint32_t* eclass,
232 uint32_t* bhtab,
233 int32_t nblock)
234{
235 int32_t ftab[257];
236 int32_t ftabCopy[256];
237 int32_t H, i, j, k, l, r, cc, cc1;
238 int32_t nNotDone;
239 int32_t nBhtab;
240 uint8_t* eclass8 = (uint8_t*)eclass;
241
242
243
244
245
246 for (i = 0; i < 257; i++) ftab[i] = 0;
247 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
248 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
249
250 j = ftab[0];
251 for (i = 1; i < 257; i++) {
252 j += ftab[i];
253 ftab[i] = j;
254 }
255
256 for (i = 0; i < nblock; i++) {
257 j = eclass8[i];
258 k = ftab[j] - 1;
259 ftab[j] = k;
260 fmap[k] = i;
261 }
262
263 nBhtab = 2 + ((uint32_t)nblock / 32);
264 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
265 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
266
267
268
269
270
271
272
273
274 for (i = 0; i < 32; i++) {
275 SET_BH(nblock + 2*i);
276 CLEAR_BH(nblock + 2*i + 1);
277 }
278
279
280 H = 1;
281 while (1) {
282 j = 0;
283 for (i = 0; i < nblock; i++) {
284 if (ISSET_BH(i))
285 j = i;
286 k = fmap[i] - H;
287 if (k < 0)
288 k += nblock;
289 eclass[k] = j;
290 }
291
292 nNotDone = 0;
293 r = -1;
294 while (1) {
295
296
297 k = r + 1;
298 while (ISSET_BH(k) && UNALIGNED_BH(k))
299 k++;
300 if (ISSET_BH(k)) {
301 while (WORD_BH(k) == 0xffffffff) k += 32;
302 while (ISSET_BH(k)) k++;
303 }
304 l = k - 1;
305 if (l >= nblock)
306 break;
307 while (!ISSET_BH(k) && UNALIGNED_BH(k))
308 k++;
309 if (!ISSET_BH(k)) {
310 while (WORD_BH(k) == 0x00000000) k += 32;
311 while (!ISSET_BH(k)) k++;
312 }
313 r = k - 1;
314 if (r >= nblock)
315 break;
316
317
318 if (r > l) {
319 nNotDone += (r - l + 1);
320 fallbackQSort3(fmap, eclass, l, r);
321
322
323 cc = -1;
324 for (i = l; i <= r; i++) {
325 cc1 = eclass[fmap[i]];
326 if (cc != cc1) {
327 SET_BH(i);
328 cc = cc1;
329 };
330 }
331 }
332 }
333
334 H *= 2;
335 if (H > nblock || nNotDone == 0)
336 break;
337 }
338
339
340
341
342
343
344 j = 0;
345 for (i = 0; i < nblock; i++) {
346 while (ftabCopy[j] == 0)
347 j++;
348 ftabCopy[j]--;
349 eclass8[fmap[i]] = (uint8_t)j;
350 }
351 AssertH(j < 256, 1005);
352}
353
354#undef SET_BH
355#undef CLEAR_BH
356#undef ISSET_BH
357#undef WORD_BH
358#undef UNALIGNED_BH
359
360
361
362
363
364
365
366
367
368static
369NOINLINE
370int mainGtU(
371 uint32_t i1,
372 uint32_t i2,
373 uint8_t* block,
374 uint16_t* quadrant,
375 uint32_t nblock,
376 int32_t* budget)
377{
378 int32_t k;
379 uint8_t c1, c2;
380 uint16_t s1, s2;
381
382
383
384
385
386
387
388#if CONFIG_BZIP2_FEATURE_SPEED >= 1
389
390#define TIMES_8(code) \
391 code; code; code; code; \
392 code; code; code; code;
393#define TIMES_12(code) \
394 code; code; code; code; \
395 code; code; code; code; \
396 code; code; code; code;
397
398#else
399
400#define TIMES_8(code) \
401{ \
402 int nn = 8; \
403 do { \
404 code; \
405 } while (--nn); \
406}
407#define TIMES_12(code) \
408{ \
409 int nn = 12; \
410 do { \
411 code; \
412 } while (--nn); \
413}
414
415#endif
416
417 AssertD(i1 != i2, "mainGtU");
418 TIMES_12(
419 c1 = block[i1]; c2 = block[i2];
420 if (c1 != c2) return (c1 > c2);
421 i1++; i2++;
422 )
423
424 k = nblock + 8;
425
426 do {
427 TIMES_8(
428 c1 = block[i1]; c2 = block[i2];
429 if (c1 != c2) return (c1 > c2);
430 s1 = quadrant[i1]; s2 = quadrant[i2];
431 if (s1 != s2) return (s1 > s2);
432 i1++; i2++;
433 )
434
435 if (i1 >= nblock) i1 -= nblock;
436 if (i2 >= nblock) i2 -= nblock;
437
438 (*budget)--;
439 k -= 8;
440 } while (k >= 0);
441
442 return False;
443}
444#undef TIMES_8
445#undef TIMES_12
446
447
448
449
450
451
452
453
454static
455const int32_t incs[14] = {
456 1, 4, 13, 40, 121, 364, 1093, 3280,
457 9841, 29524, 88573, 265720,
458 797161, 2391484
459};
460
461static
462void mainSimpleSort(uint32_t* ptr,
463 uint8_t* block,
464 uint16_t* quadrant,
465 int32_t nblock,
466 int32_t lo,
467 int32_t hi,
468 int32_t d,
469 int32_t* budget)
470{
471 int32_t i, j, h, bigN, hp;
472 uint32_t v;
473
474 bigN = hi - lo + 1;
475 if (bigN < 2) return;
476
477 hp = 0;
478 while (incs[hp] < bigN) hp++;
479 hp--;
480
481 for (; hp >= 0; hp--) {
482 h = incs[hp];
483
484 i = lo + h;
485 while (1) {
486
487 if (i > hi) break;
488 v = ptr[i];
489 j = i;
490 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
491 ptr[j] = ptr[j-h];
492 j = j - h;
493 if (j <= (lo + h - 1)) break;
494 }
495 ptr[j] = v;
496 i++;
497
498
499#if CONFIG_BZIP2_FEATURE_SPEED >= 3
500
501 if (i > hi) break;
502 v = ptr[i];
503 j = i;
504 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
505 ptr[j] = ptr[j-h];
506 j = j - h;
507 if (j <= (lo + h - 1)) break;
508 }
509 ptr[j] = v;
510 i++;
511
512
513 if (i > hi) break;
514 v = ptr[i];
515 j = i;
516 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
517 ptr[j] = ptr[j-h];
518 j = j - h;
519 if (j <= (lo + h - 1)) break;
520 }
521 ptr[j] = v;
522 i++;
523#endif
524 if (*budget < 0) return;
525 }
526 }
527}
528
529
530
531
532
533
534
535
536
537
538
539static
540ALWAYS_INLINE
541uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
542{
543 uint8_t t;
544 if (a > b) {
545 t = a;
546 a = b;
547 b = t;
548 };
549
550 if (b > c) {
551 b = c;
552 if (a > b)
553 b = a;
554 }
555 return b;
556}
557
558#define mpush(lz,hz,dz) \
559{ \
560 stackLo[sp] = lz; \
561 stackHi[sp] = hz; \
562 stackD [sp] = dz; \
563 sp++; \
564}
565
566#define mpop(lz,hz,dz) \
567{ \
568 sp--; \
569 lz = stackLo[sp]; \
570 hz = stackHi[sp]; \
571 dz = stackD [sp]; \
572}
573
574#define mnextsize(az) (nextHi[az] - nextLo[az])
575
576#define mnextswap(az,bz) \
577{ \
578 int32_t tz; \
579 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
580 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
581 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
582}
583
584#define MAIN_QSORT_SMALL_THRESH 20
585#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
586#define MAIN_QSORT_STACK_SIZE 100
587
588static
589void mainQSort3(uint32_t* ptr,
590 uint8_t* block,
591 uint16_t* quadrant,
592 int32_t nblock,
593 int32_t loSt,
594 int32_t hiSt,
595 int32_t dSt,
596 int32_t* budget)
597{
598 int32_t unLo, unHi, ltLo, gtHi, n, m, med;
599 int32_t sp, lo, hi, d;
600
601 int32_t stackLo[MAIN_QSORT_STACK_SIZE];
602 int32_t stackHi[MAIN_QSORT_STACK_SIZE];
603 int32_t stackD [MAIN_QSORT_STACK_SIZE];
604
605 int32_t nextLo[3];
606 int32_t nextHi[3];
607 int32_t nextD [3];
608
609 sp = 0;
610 mpush(loSt, hiSt, dSt);
611
612 while (sp > 0) {
613 AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
614
615 mpop(lo, hi, d);
616 if (hi - lo < MAIN_QSORT_SMALL_THRESH
617 || d > MAIN_QSORT_DEPTH_THRESH
618 ) {
619 mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
620 if (*budget < 0)
621 return;
622 continue;
623 }
624 med = (int32_t) mmed3(block[ptr[lo ] + d],
625 block[ptr[hi ] + d],
626 block[ptr[(lo+hi) >> 1] + d]);
627
628 unLo = ltLo = lo;
629 unHi = gtHi = hi;
630
631 while (1) {
632 while (1) {
633 if (unLo > unHi)
634 break;
635 n = ((int32_t)block[ptr[unLo]+d]) - med;
636 if (n == 0) {
637 mswap(ptr[unLo], ptr[ltLo]);
638 ltLo++;
639 unLo++;
640 continue;
641 };
642 if (n > 0) break;
643 unLo++;
644 }
645 while (1) {
646 if (unLo > unHi)
647 break;
648 n = ((int32_t)block[ptr[unHi]+d]) - med;
649 if (n == 0) {
650 mswap(ptr[unHi], ptr[gtHi]);
651 gtHi--;
652 unHi--;
653 continue;
654 };
655 if (n < 0) break;
656 unHi--;
657 }
658 if (unLo > unHi)
659 break;
660 mswap(ptr[unLo], ptr[unHi]);
661 unLo++;
662 unHi--;
663 }
664
665 AssertD(unHi == unLo-1, "mainQSort3(2)");
666
667 if (gtHi < ltLo) {
668 mpush(lo, hi, d + 1);
669 continue;
670 }
671
672 n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
673 m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
674
675 n = lo + unLo - ltLo - 1;
676 m = hi - (gtHi - unHi) + 1;
677
678 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
679 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
680 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
681
682 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
683 if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
684 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
685
686 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
687 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
688
689 mpush(nextLo[0], nextHi[0], nextD[0]);
690 mpush(nextLo[1], nextHi[1], nextD[1]);
691 mpush(nextLo[2], nextHi[2], nextD[2]);
692 }
693}
694
695#undef mpush
696#undef mpop
697#undef mnextsize
698#undef mnextswap
699#undef MAIN_QSORT_SMALL_THRESH
700#undef MAIN_QSORT_DEPTH_THRESH
701#undef MAIN_QSORT_STACK_SIZE
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
720#define SETMASK (1 << 21)
721#define CLEARMASK (~(SETMASK))
722
723static NOINLINE
724void mainSort(EState* state,
725 uint32_t* ptr,
726 uint8_t* block,
727 uint16_t* quadrant,
728 uint32_t* ftab,
729 int32_t nblock,
730 int32_t* budget)
731{
732 int32_t i, j, k, ss, sb;
733 uint8_t c1;
734 int32_t numQSorted;
735 uint16_t s;
736 Bool bigDone[256];
737
738
739
740
741
742#define runningOrder (state->mainSort__runningOrder)
743#define copyStart (state->mainSort__copyStart)
744#define copyEnd (state->mainSort__copyEnd)
745
746
747
748 memset(ftab, 0, 65537 * sizeof(ftab[0]));
749
750 j = block[0] << 8;
751 i = nblock - 1;
752
753#if CONFIG_BZIP2_FEATURE_SPEED >= 2
754 for (; i >= 3; i -= 4) {
755 quadrant[i] = 0;
756 j = (j >> 8) | (((uint16_t)block[i]) << 8);
757 ftab[j]++;
758 quadrant[i-1] = 0;
759 j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
760 ftab[j]++;
761 quadrant[i-2] = 0;
762 j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
763 ftab[j]++;
764 quadrant[i-3] = 0;
765 j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
766 ftab[j]++;
767 }
768#endif
769 for (; i >= 0; i--) {
770 quadrant[i] = 0;
771 j = (j >> 8) | (((uint16_t)block[i]) << 8);
772 ftab[j]++;
773 }
774
775
776 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
777 block [nblock+i] = block[i];
778 quadrant[nblock+i] = 0;
779 }
780
781
782 j = ftab[0];
783 for (i = 1; i <= 65536; i++) {
784 j += ftab[i];
785 ftab[i] = j;
786 }
787
788 s = block[0] << 8;
789 i = nblock - 1;
790#if CONFIG_BZIP2_FEATURE_SPEED >= 2
791 for (; i >= 3; i -= 4) {
792 s = (s >> 8) | (block[i] << 8);
793 j = ftab[s] - 1;
794 ftab[s] = j;
795 ptr[j] = i;
796 s = (s >> 8) | (block[i-1] << 8);
797 j = ftab[s] - 1;
798 ftab[s] = j;
799 ptr[j] = i-1;
800 s = (s >> 8) | (block[i-2] << 8);
801 j = ftab[s] - 1;
802 ftab[s] = j;
803 ptr[j] = i-2;
804 s = (s >> 8) | (block[i-3] << 8);
805 j = ftab[s] - 1;
806 ftab[s] = j;
807 ptr[j] = i-3;
808 }
809#endif
810 for (; i >= 0; i--) {
811 s = (s >> 8) | (block[i] << 8);
812 j = ftab[s] - 1;
813 ftab[s] = j;
814 ptr[j] = i;
815 }
816
817
818
819
820
821
822 for (i = 0; i <= 255; i++) {
823 bigDone [i] = False;
824 runningOrder[i] = i;
825 }
826
827 {
828 int32_t vv;
829
830
831 uint32_t h = 364;
832
833 do {
834
835 h = (h * 171) >> 9;
836 for (i = h; i <= 255; i++) {
837 vv = runningOrder[i];
838 j = i;
839 while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
840 runningOrder[j] = runningOrder[j-h];
841 j = j - h;
842 if (j <= (h - 1))
843 goto zero;
844 }
845 zero:
846 runningOrder[j] = vv;
847 }
848 } while (h != 1);
849 }
850
851
852
853
854
855 numQSorted = 0;
856
857 for (i = 0; i <= 255; i++) {
858
859
860
861
862
863
864
865 ss = runningOrder[i];
866
867
868
869
870
871
872
873
874
875 for (j = 0; j <= 255; j++) {
876 if (j != ss) {
877 sb = (ss << 8) + j;
878 if (!(ftab[sb] & SETMASK)) {
879 int32_t lo = ftab[sb] & CLEARMASK;
880 int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
881 if (hi > lo) {
882 mainQSort3(
883 ptr, block, quadrant, nblock,
884 lo, hi, BZ_N_RADIX, budget
885 );
886 if (*budget < 0) return;
887 numQSorted += (hi - lo + 1);
888 }
889 }
890 ftab[sb] |= SETMASK;
891 }
892 }
893
894 AssertH(!bigDone[ss], 1006);
895
896
897
898
899
900
901
902
903 {
904 for (j = 0; j <= 255; j++) {
905 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
906 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
907 }
908 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
909 k = ptr[j] - 1;
910 if (k < 0)
911 k += nblock;
912 c1 = block[k];
913 if (!bigDone[c1])
914 ptr[copyStart[c1]++] = k;
915 }
916 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
917 k = ptr[j]-1;
918 if (k < 0)
919 k += nblock;
920 c1 = block[k];
921 if (!bigDone[c1])
922 ptr[copyEnd[c1]--] = k;
923 }
924 }
925
926
927
928
929
930 AssertH((copyStart[ss]-1 == copyEnd[ss]) \
931 || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
932
933 for (j = 0; j <= 255; j++)
934 ftab[(j << 8) + ss] |= SETMASK;
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975 bigDone[ss] = True;
976
977 if (i < 255) {
978 int32_t bbStart = ftab[ss << 8] & CLEARMASK;
979 int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
980 int32_t shifts = 0;
981
982 while ((bbSize >> shifts) > 65534) shifts++;
983
984 for (j = bbSize-1; j >= 0; j--) {
985 int32_t a2update = ptr[bbStart + j];
986 uint16_t qVal = (uint16_t)(j >> shifts);
987 quadrant[a2update] = qVal;
988 if (a2update < BZ_N_OVERSHOOT)
989 quadrant[a2update + nblock] = qVal;
990 }
991 AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
992 }
993 }
994#undef runningOrder
995#undef copyStart
996#undef copyEnd
997}
998
999#undef BIGFREQ
1000#undef SETMASK
1001#undef CLEARMASK
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017static NOINLINE
1018void BZ2_blockSort(EState* s)
1019{
1020
1021
1022 enum { wfact = 30 };
1023
1024 uint32_t* ptr = s->ptr;
1025 uint8_t* block = s->block;
1026 uint32_t* ftab = s->ftab;
1027 int32_t nblock = s->nblock;
1028 uint16_t* quadrant;
1029 int32_t budget;
1030 int32_t i;
1031
1032 if (nblock < 10000) {
1033 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1034 } else {
1035
1036
1037
1038
1039
1040 i = nblock + BZ_N_OVERSHOOT;
1041 if (i & 1) i++;
1042 quadrant = (uint16_t*)(&(block[i]));
1043
1044
1045
1046
1047
1048
1049
1050
1051 budget = nblock * ((wfact-1) / 3);
1052
1053 mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
1054 if (budget < 0) {
1055 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1056 }
1057 }
1058
1059 s->origPtr = -1;
1060 for (i = 0; i < s->nblock; i++)
1061 if (ptr[i] == 0) {
1062 s->origPtr = i;
1063 break;
1064 };
1065
1066 AssertH(s->origPtr != -1, 1003);
1067}
1068
1069
1070
1071
1072
1073