1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#if BZIP2_SPEED >= 5
36# define ALWAYS_INLINE_5 ALWAYS_INLINE
37#else
38# define ALWAYS_INLINE_5
39#endif
40
41
42
43
44
45
46static
47void BZ2_bsInitWrite(EState* s)
48{
49 s->bsLive = 0;
50 s->bsBuff = 0;
51}
52
53
54
55static NOINLINE
56void bsFinishWrite(EState* s)
57{
58 while (s->bsLive > 0) {
59 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
60 s->bsBuff <<= 8;
61 s->bsLive -= 8;
62 }
63}
64
65
66
67static
68
69ALWAYS_INLINE_5
70void bsW(EState* s, int32_t n, uint32_t v)
71{
72 while (s->bsLive >= 8) {
73 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
74 s->bsBuff <<= 8;
75 s->bsLive -= 8;
76 }
77 s->bsBuff |= (v << (32 - s->bsLive - n));
78 s->bsLive += n;
79}
80
81static
82ALWAYS_INLINE_5
83void bsW16(EState* s, uint32_t v)
84{
85 while (s->bsLive >= 8) {
86 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
87 s->bsBuff <<= 8;
88 s->bsLive -= 8;
89 }
90 s->bsBuff |= (v << (16 - s->bsLive));
91 s->bsLive += 16;
92}
93
94static
95ALWAYS_INLINE
96void bsW1_1(EState* s)
97{
98
99 if (s->bsLive >= 8) {
100 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
101 s->bsBuff <<= 8;
102 s->bsLive -= 8;
103 }
104 s->bsBuff |= (1 << (31 - s->bsLive));
105 s->bsLive += 1;
106}
107static
108ALWAYS_INLINE_5
109void bsW1_0(EState* s)
110{
111
112 if (s->bsLive >= 8) {
113 *s->posZ++ = (uint8_t)(s->bsBuff >> 24);
114 s->bsBuff <<= 8;
115 s->bsLive -= 8;
116 }
117
118 s->bsLive += 1;
119}
120
121
122
123static ALWAYS_INLINE
124void bsPutU16(EState* s, unsigned u)
125{
126 bsW16(s, u);
127}
128
129
130
131static
132void bsPutU32(EState* s, unsigned u)
133{
134
135 bsW16(s, (u >> 16) & 0xffff);
136 bsW16(s, u & 0xffff);
137}
138
139
140
141
142
143
144
145static
146void makeMaps_e(EState* s)
147{
148 int i;
149 unsigned cnt = 0;
150 for (i = 0; i < 256; i++) {
151 if (s->inUse[i]) {
152 s->unseqToSeq[i] = cnt;
153 cnt++;
154 }
155 }
156 s->nInUse = cnt;
157}
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172static
173#if defined __i386__
174NOINLINE
175#endif
176int inner_loop(uint8_t *yy, uint8_t ll_i)
177{
178 register uint8_t rtmp;
179 register uint8_t* ryy_j;
180 rtmp = yy[1];
181 yy[1] = yy[0];
182 ryy_j = &(yy[1]);
183 while (ll_i != rtmp) {
184 register uint8_t rtmp2;
185 ryy_j++;
186 rtmp2 = rtmp;
187 rtmp = *ryy_j;
188 *ryy_j = rtmp2;
189 }
190 yy[0] = rtmp;
191 return ryy_j - &(yy[0]);
192}
193static NOINLINE
194void generateMTFValues(EState* s)
195{
196 uint8_t yy[256];
197 int i;
198 int zPend;
199 int32_t wr;
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 uint32_t* ptr = s->ptr;
223
224 makeMaps_e(s);
225
226 wr = 0;
227 zPend = 0;
228 for (i = 0; i <= s->nInUse+1; i++)
229 s->mtfFreq[i] = 0;
230
231 for (i = 0; i < s->nInUse; i++)
232 yy[i] = (uint8_t) i;
233
234 for (i = 0; i < s->nblock; i++) {
235 uint8_t ll_i = ll_i;
236 int32_t j;
237
238 AssertD(wr <= i, "generateMTFValues(1)");
239 j = ptr[i] - 1;
240 if (j < 0)
241 j += s->nblock;
242 ll_i = s->unseqToSeq[s->block[j]];
243 AssertD(ll_i < s->nInUse, "generateMTFValues(2a)");
244
245 if (yy[0] == ll_i) {
246 zPend++;
247 continue;
248 }
249
250 if (zPend > 0) {
251 process_zPend:
252 zPend--;
253 while (1) {
254#if 0
255 if (zPend & 1) {
256 s->mtfv[wr] = BZ_RUNB; wr++;
257 s->mtfFreq[BZ_RUNB]++;
258 } else {
259 s->mtfv[wr] = BZ_RUNA; wr++;
260 s->mtfFreq[BZ_RUNA]++;
261 }
262#else
263 unsigned run = zPend & 1;
264 s->mtfv[wr] = run;
265 wr++;
266 s->mtfFreq[run]++;
267#endif
268 zPend -= 2;
269 if (zPend < 0)
270 break;
271 zPend = (unsigned)zPend / 2;
272
273 }
274 if (i < 0)
275 goto end;
276 zPend = 0;
277 }
278 j = inner_loop(yy, ll_i);
279 s->mtfv[wr] = j+1;
280 wr++;
281 s->mtfFreq[j+1]++;
282 }
283
284 i = -1;
285 if (zPend > 0)
286 goto process_zPend;
287 end:
288 s->mtfv[wr] = s->nInUse+1;
289 wr++;
290 s->mtfFreq[s->nInUse+1]++;
291
292 s->nMTF = wr;
293}
294
295
296
297#define BZ_LESSER_ICOST 0
298#define BZ_GREATER_ICOST 15
299
300static NOINLINE
301void sendMTFValues(EState* s)
302{
303 int32_t t, i;
304 unsigned iter;
305 unsigned gs;
306 int32_t alphaSize;
307 unsigned nSelectors, selCtr;
308 int32_t nGroups;
309
310
311
312
313
314
315
316
317
318
319#define code sendMTFValues__code
320#define rfreq sendMTFValues__rfreq
321#define len_pack sendMTFValues__len_pack
322
323 unsigned cost[BZ_N_GROUPS];
324
325 uint16_t* mtfv = s->mtfv;
326
327 alphaSize = s->nInUse + 2;
328 for (t = 0; t < BZ_N_GROUPS; t++) {
329 unsigned v;
330 for (v = 0; v < alphaSize; v++)
331 s->len[t][v] = BZ_GREATER_ICOST;
332 }
333
334
335 AssertH(s->nMTF > 0, 3001);
336
337
338
339
340
341 nGroups = 2;
342 nGroups += (s->nMTF >= 200);
343 nGroups += (s->nMTF >= 600);
344 nGroups += (s->nMTF >= 1200);
345 nGroups += (s->nMTF >= 2400);
346
347
348 {
349 unsigned nPart, remF;
350
351 nPart = nGroups;
352 remF = s->nMTF;
353 gs = 0;
354 while (nPart > 0) {
355 unsigned v;
356 unsigned ge;
357 unsigned tFreq, aFreq;
358
359 tFreq = remF / nPart;
360 ge = gs;
361 aFreq = 0;
362 while (aFreq < tFreq && ge < alphaSize) {
363 aFreq += s->mtfFreq[ge++];
364 }
365 ge--;
366
367 if (ge > gs
368 && nPart != nGroups && nPart != 1
369 && ((nGroups - nPart) % 2 == 1)
370 ) {
371 aFreq -= s->mtfFreq[ge];
372 ge--;
373 }
374
375 for (v = 0; v < alphaSize; v++)
376 if (v >= gs && v <= ge)
377 s->len[nPart-1][v] = BZ_LESSER_ICOST;
378 else
379 s->len[nPart-1][v] = BZ_GREATER_ICOST;
380
381 nPart--;
382 gs = ge + 1;
383 remF -= aFreq;
384 }
385 }
386
387
388
389
390 for (iter = 0; iter < BZ_N_ITERS; iter++) {
391 for (t = 0; t < nGroups; t++) {
392 unsigned v;
393 for (v = 0; v < alphaSize; v++)
394 s->rfreq[t][v] = 0;
395 }
396
397#if BZIP2_SPEED >= 5
398
399
400
401
402 if (nGroups == 6) {
403 unsigned v;
404 for (v = 0; v < alphaSize; v++) {
405 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
406 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
407 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
408 }
409 }
410#endif
411 nSelectors = 0;
412 gs = 0;
413 while (1) {
414 unsigned ge;
415 unsigned bt, bc;
416
417
418 if (gs >= s->nMTF)
419 break;
420 ge = gs + BZ_G_SIZE - 1;
421 if (ge >= s->nMTF)
422 ge = s->nMTF-1;
423
424
425
426
427
428 for (t = 0; t < nGroups; t++)
429 cost[t] = 0;
430#if BZIP2_SPEED >= 5
431 if (nGroups == 6 && 50 == ge-gs+1) {
432
433 register uint32_t cost01, cost23, cost45;
434 register uint16_t icv;
435 cost01 = cost23 = cost45 = 0;
436#define BZ_ITER(nn) \
437 icv = mtfv[gs+(nn)]; \
438 cost01 += s->len_pack[icv][0]; \
439 cost23 += s->len_pack[icv][1]; \
440 cost45 += s->len_pack[icv][2];
441 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
442 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
443 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
444 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
445 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
446 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
447 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
448 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
449 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
450 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
451#undef BZ_ITER
452 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
453 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
454 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
455 } else
456#endif
457 {
458
459 for (i = gs; i <= ge; i++) {
460 unsigned icv = mtfv[i];
461 for (t = 0; t < nGroups; t++)
462 cost[t] += s->len[t][icv];
463 }
464 }
465
466
467
468
469
470
471 bc = cost[0];
472 bt = 0;
473 for (t = 1 ; t < nGroups; t++) {
474 if (cost[t] < bc) {
475 bc = cost[t];
476 bt = t;
477 }
478 }
479 s->selector[nSelectors] = bt;
480 nSelectors++;
481
482
483
484
485
486#if BZIP2_SPEED >= 4
487 if (nGroups == 6 && 50 == ge-gs+1) {
488
489#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
490 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
491 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
492 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
493 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
494 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
495 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
496 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
497 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
498 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
499 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
500#undef BZ_ITUR
501 gs = ge + 1;
502 } else
503#endif
504 {
505
506 while (gs <= ge) {
507 s->rfreq[bt][mtfv[gs]]++;
508 gs++;
509 }
510
511 }
512 }
513
514
515
516
517
518
519 for (t = 0; t < nGroups; t++)
520 BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 );
521 }
522
523 AssertH(nGroups < 8, 3002);
524 AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003);
525
526
527 {
528 uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
529
530 for (i = 0; i < nGroups; i++)
531 pos[i] = i;
532 for (i = 0; i < nSelectors; i++) {
533 unsigned j;
534 ll_i = s->selector[i];
535 j = 0;
536 tmp = pos[j];
537 while (ll_i != tmp) {
538 j++;
539 tmp2 = tmp;
540 tmp = pos[j];
541 pos[j] = tmp2;
542 }
543 pos[0] = tmp;
544 s->selectorMtf[i] = j;
545 }
546 }
547
548
549 for (t = 0; t < nGroups; t++) {
550 unsigned minLen = 32;
551 unsigned maxLen = 0;
552 for (i = 0; i < alphaSize; i++) {
553 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
554 if (s->len[t][i] < minLen) minLen = s->len[t][i];
555 }
556 AssertH(!(maxLen > 17 ), 3004);
557 AssertH(!(minLen < 1), 3005);
558 BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize);
559 }
560
561
562 {
563
564 int inUse16 = 0;
565 for (i = 0; i < 16; i++) {
566 if (sizeof(long) <= 4) {
567 inUse16 = inUse16*2 +
568 ((*(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 0])
569 | *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 4])
570 | *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 8])
571 | *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 12])) != 0);
572 } else {
573 inUse16 = inUse16*2 +
574 ((*(bb__aliased_uint64_t*)&(s->inUse[i * 16 + 0])
575 | *(bb__aliased_uint64_t*)&(s->inUse[i * 16 + 8])) != 0);
576 }
577 }
578
579 bsW16(s, inUse16);
580
581 inUse16 <<= (sizeof(int)*8 - 16);
582 for (i = 0; i < 16; i++) {
583 if (inUse16 < 0) {
584 unsigned v16 = 0;
585 unsigned j;
586 for (j = 0; j < 16; j++)
587 v16 = v16*2 + s->inUse[i * 16 + j];
588 bsW16(s, v16);
589 }
590 inUse16 <<= 1;
591 }
592 }
593
594
595 bsW(s, 3, nGroups);
596 bsW(s, 15, nSelectors);
597 for (i = 0; i < nSelectors; i++) {
598 unsigned j;
599 for (j = 0; j < s->selectorMtf[i]; j++)
600 bsW1_1(s);
601 bsW1_0(s);
602 }
603
604
605 for (t = 0; t < nGroups; t++) {
606 unsigned curr = s->len[t][0];
607 bsW(s, 5, curr);
608 for (i = 0; i < alphaSize; i++) {
609 while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; }
610 while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; }
611 bsW1_0(s);
612 }
613 }
614
615
616 selCtr = 0;
617 gs = 0;
618 while (1) {
619 unsigned ge;
620
621 if (gs >= s->nMTF)
622 break;
623 ge = gs + BZ_G_SIZE - 1;
624 if (ge >= s->nMTF)
625 ge = s->nMTF-1;
626 AssertH(s->selector[selCtr] < nGroups, 3006);
627
628
629#if 0
630 if (nGroups == 6 && 50 == ge-gs+1) {
631
632 uint16_t mtfv_i;
633 uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]);
634 int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
635#define BZ_ITAH(nn) \
636 mtfv_i = mtfv[gs+(nn)]; \
637 bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i])
638 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
639 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
640 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
641 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
642 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
643 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
644 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
645 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
646 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
647 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
648#undef BZ_ITAH
649 gs = ge+1;
650 } else
651#endif
652 {
653
654
655 uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]);
656 int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
657 while (gs <= ge) {
658 bsW(s,
659 s_len_sel_selCtr[mtfv[gs]],
660 s_code_sel_selCtr[mtfv[gs]]
661 );
662 gs++;
663 }
664
665 }
666 selCtr++;
667 }
668 AssertH(selCtr == nSelectors, 3007);
669#undef code
670#undef rfreq
671#undef len_pack
672}
673
674
675
676static
677void BZ2_compressBlock(EState* s, int is_last_block)
678{
679 int32_t origPtr = origPtr;
680
681 if (s->nblock > 0) {
682 BZ_FINALISE_CRC(s->blockCRC);
683 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
684 s->combinedCRC ^= s->blockCRC;
685 if (s->blockNo > 1)
686 s->posZ = s->zbits;
687
688 origPtr = BZ2_blockSort(s);
689 }
690
691 s->zbits = &((uint8_t*)s->arr2)[s->nblock];
692 s->posZ = s->zbits;
693 s->state_out_pos = s->zbits;
694
695
696 if (s->blockNo == 1) {
697 BZ2_bsInitWrite(s);
698
699
700
701
702 bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k);
703 }
704
705 if (s->nblock > 0) {
706
707
708
709
710 bsPutU32(s, 0x31415926);
711
712
713 bsPutU16(s, 0x5359);
714
715
716 bsPutU32(s, s->blockCRC);
717
718
719
720
721
722
723
724
725
726
727 bsW1_0(s);
728
729 bsW(s, 24, origPtr);
730 generateMTFValues(s);
731 sendMTFValues(s);
732 }
733
734
735 if (is_last_block) {
736
737
738
739
740 bsPutU32(s, 0x17724538);
741
742
743 bsPutU16(s, 0x5090);
744 bsPutU32(s, s->combinedCRC);
745 bsFinishWrite(s);
746 }
747}
748
749
750
751
752
753