1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69#include "bzlib_private.h"
70#include <compiler.h>
71
72
73
74
75
76
77void BZ2_bsInitWrite ( EState* s )
78{
79 s->bsLive = 0;
80 s->bsBuff = 0;
81}
82
83
84
85static
86void bsFinishWrite ( EState* s )
87{
88 while (s->bsLive > 0) {
89 s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
90 s->numZ++;
91 s->bsBuff <<= 8;
92 s->bsLive -= 8;
93 }
94}
95
96
97
98#define bsNEEDW(nz) \
99{ \
100 while (s->bsLive >= 8) { \
101 s->zbits[s->numZ] \
102 = (UChar)(s->bsBuff >> 24); \
103 s->numZ++; \
104 s->bsBuff <<= 8; \
105 s->bsLive -= 8; \
106 } \
107}
108
109
110
111static
112__inline__
113void bsW ( EState* s, Int32 n, UInt32 v )
114{
115 bsNEEDW ( n );
116 s->bsBuff |= (v << (32 - s->bsLive - n));
117 s->bsLive += n;
118}
119
120
121
122static
123void bsPutUInt32 ( EState* s, UInt32 u )
124{
125 bsW ( s, 8, (u >> 24) & 0xffL );
126 bsW ( s, 8, (u >> 16) & 0xffL );
127 bsW ( s, 8, (u >> 8) & 0xffL );
128 bsW ( s, 8, u & 0xffL );
129}
130
131
132
133static
134void bsPutUChar ( EState* s, UChar c )
135{
136 bsW( s, 8, (UInt32)c );
137}
138
139
140
141
142
143
144
145static
146void makeMaps_e ( EState* s )
147{
148 Int32 i;
149 s->nInUse = 0;
150 for (i = 0; i < 256; i++)
151 if (s->inUse[i]) {
152 s->unseqToSeq[i] = s->nInUse;
153 s->nInUse++;
154 }
155}
156
157
158
159static
160void generateMTFValues ( EState* s )
161{
162 UChar yy[256];
163 Int32 i, j;
164 Int32 zPend;
165 Int32 wr;
166 Int32 EOB;
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190 UInt32* ptr = s->ptr;
191 UChar* block = s->block;
192 UInt16* mtfv = s->mtfv;
193
194 makeMaps_e ( s );
195 EOB = s->nInUse+1;
196
197 for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
198
199 wr = 0;
200 zPend = 0;
201 for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
202
203 for (i = 0; i < s->nblock; i++) {
204 UChar ll_i;
205 AssertD ( wr <= i, "generateMTFValues(1)" );
206 j = ptr[i]-1; if (j < 0) j += s->nblock;
207 ll_i = s->unseqToSeq[block[j]];
208 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
209
210 if (yy[0] == ll_i) {
211 zPend++;
212 } else {
213
214 if (zPend > 0) {
215 zPend--;
216 while (True) {
217 if (zPend & 1) {
218 mtfv[wr] = BZ_RUNB; wr++;
219 s->mtfFreq[BZ_RUNB]++;
220 } else {
221 mtfv[wr] = BZ_RUNA; wr++;
222 s->mtfFreq[BZ_RUNA]++;
223 }
224 if (zPend < 2) break;
225 zPend = (zPend - 2) / 2;
226 };
227 zPend = 0;
228 }
229 {
230 register UChar rtmp;
231 register UChar* ryy_j;
232 register UChar rll_i;
233 rtmp = yy[1];
234 yy[1] = yy[0];
235 ryy_j = &(yy[1]);
236 rll_i = ll_i;
237 while ( rll_i != rtmp ) {
238 register UChar rtmp2;
239 ryy_j++;
240 rtmp2 = rtmp;
241 rtmp = *ryy_j;
242 *ryy_j = rtmp2;
243 };
244 yy[0] = rtmp;
245 j = ryy_j - &(yy[0]);
246 mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
247 }
248
249 }
250 }
251
252 if (zPend > 0) {
253 zPend--;
254 while (True) {
255 if (zPend & 1) {
256 mtfv[wr] = BZ_RUNB; wr++;
257 s->mtfFreq[BZ_RUNB]++;
258 } else {
259 mtfv[wr] = BZ_RUNA; wr++;
260 s->mtfFreq[BZ_RUNA]++;
261 }
262 if (zPend < 2) break;
263 zPend = (zPend - 2) / 2;
264 };
265 zPend = 0;
266 }
267
268 mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
269
270 s->nMTF = wr;
271}
272
273
274
275#define BZ_LESSER_ICOST 0
276#define BZ_GREATER_ICOST 15
277
278static
279void sendMTFValues ( EState* s )
280{
281 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
282 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
283 Int32 nGroups;
284 Int32 nBytes __maybe_unused;
285
286
287
288
289
290
291
292
293
294
295
296
297 UInt16 cost[BZ_N_GROUPS];
298 Int32 fave[BZ_N_GROUPS];
299
300 UInt16* mtfv = s->mtfv;
301
302 if (s->verbosity >= 3)
303 VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
304 "%d+2 syms in use\n",
305 s->nblock, s->nMTF, s->nInUse );
306
307 alphaSize = s->nInUse+2;
308 for (t = 0; t < BZ_N_GROUPS; t++)
309 for (v = 0; v < alphaSize; v++)
310 s->len[t][v] = BZ_GREATER_ICOST;
311
312
313 AssertH ( s->nMTF > 0, 3001 );
314 if (s->nMTF < 200) nGroups = 2; else
315 if (s->nMTF < 600) nGroups = 3; else
316 if (s->nMTF < 1200) nGroups = 4; else
317 if (s->nMTF < 2400) nGroups = 5; else
318 nGroups = 6;
319
320
321 {
322 Int32 nPart, remF, tFreq, aFreq;
323
324 nPart = nGroups;
325 remF = s->nMTF;
326 gs = 0;
327 while (nPart > 0) {
328 tFreq = remF / nPart;
329 ge = gs-1;
330 aFreq = 0;
331 while (aFreq < tFreq && ge < alphaSize-1) {
332 ge++;
333 aFreq += s->mtfFreq[ge];
334 }
335
336 if (ge > gs
337 && nPart != nGroups && nPart != 1
338 && ((nGroups-nPart) % 2 == 1)) {
339 aFreq -= s->mtfFreq[ge];
340 ge--;
341 }
342
343 if (s->verbosity >= 3)
344 VPrintf5( " initial group %d, [%d .. %d], "
345 "has %d syms (%4.1f%%)\n",
346 nPart, gs, ge, aFreq,
347 (100.0 * (float)aFreq) / (float)(s->nMTF) );
348
349 for (v = 0; v < alphaSize; v++)
350 if (v >= gs && v <= ge)
351 s->len[nPart-1][v] = BZ_LESSER_ICOST; else
352 s->len[nPart-1][v] = BZ_GREATER_ICOST;
353
354 nPart--;
355 gs = ge+1;
356 remF -= aFreq;
357 }
358 }
359
360
361
362
363 for (iter = 0; iter < BZ_N_ITERS; iter++) {
364
365 for (t = 0; t < nGroups; t++) fave[t] = 0;
366
367 for (t = 0; t < nGroups; t++)
368 for (v = 0; v < alphaSize; v++)
369 s->rfreq[t][v] = 0;
370
371
372
373
374
375 if (nGroups == 6) {
376 for (v = 0; v < alphaSize; v++) {
377 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
378 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
379 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
380 }
381 }
382
383 nSelectors = 0;
384 totc = 0;
385 gs = 0;
386 while (True) {
387
388
389 if (gs >= s->nMTF) break;
390 ge = gs + BZ_G_SIZE - 1;
391 if (ge >= s->nMTF) ge = s->nMTF-1;
392
393
394
395
396
397 for (t = 0; t < nGroups; t++) cost[t] = 0;
398
399 if (nGroups == 6 && 50 == ge-gs+1) {
400
401 register UInt32 cost01, cost23, cost45;
402 register UInt16 icv;
403 cost01 = cost23 = cost45 = 0;
404
405# define BZ_ITER(nn) \
406 icv = mtfv[gs+(nn)]; \
407 cost01 += s->len_pack[icv][0]; \
408 cost23 += s->len_pack[icv][1]; \
409 cost45 += s->len_pack[icv][2]; \
410
411 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
412 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
413 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
414 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
415 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
416 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
417 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
418 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
419 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
420 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
421
422# undef BZ_ITER
423
424 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
425 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
426 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
427
428 } else {
429
430 for (i = gs; i <= ge; i++) {
431 UInt16 icv = mtfv[i];
432 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
433 }
434 }
435
436
437
438
439
440 bc = 999999999; bt = -1;
441 for (t = 0; t < nGroups; t++)
442 if (cost[t] < bc) { bc = cost[t]; bt = t; };
443 totc += bc;
444 fave[bt]++;
445 s->selector[nSelectors] = bt;
446 nSelectors++;
447
448
449
450
451 if (nGroups == 6 && 50 == ge-gs+1) {
452
453
454# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
455
456 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
457 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
458 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
459 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
460 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
461 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
462 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
463 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
464 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
465 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
466
467# undef BZ_ITUR
468
469 } else {
470
471 for (i = gs; i <= ge; i++)
472 s->rfreq[bt][ mtfv[i] ]++;
473 }
474
475 gs = ge+1;
476 }
477 if (s->verbosity >= 3) {
478 VPrintf2 ( " pass %d: size is %d, grp uses are ",
479 iter+1, totc/8 );
480 for (t = 0; t < nGroups; t++)
481 VPrintf1 ( "%d ", fave[t] );
482 VPrintf0 ( "\n" );
483 }
484
485
486
487
488
489
490 for (t = 0; t < nGroups; t++)
491 BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
492 alphaSize, 17 );
493 }
494
495
496 AssertH( nGroups < 8, 3002 );
497 AssertH( nSelectors < 32768 &&
498 nSelectors <= (2 + (900000 / BZ_G_SIZE)),
499 3003 );
500
501
502
503 {
504 UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
505 for (i = 0; i < nGroups; i++) pos[i] = i;
506 for (i = 0; i < nSelectors; i++) {
507 ll_i = s->selector[i];
508 j = 0;
509 tmp = pos[j];
510 while ( ll_i != tmp ) {
511 j++;
512 tmp2 = tmp;
513 tmp = pos[j];
514 pos[j] = tmp2;
515 };
516 pos[0] = tmp;
517 s->selectorMtf[i] = j;
518 }
519 };
520
521
522 for (t = 0; t < nGroups; t++) {
523 minLen = 32;
524 maxLen = 0;
525 for (i = 0; i < alphaSize; i++) {
526 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
527 if (s->len[t][i] < minLen) minLen = s->len[t][i];
528 }
529 AssertH ( !(maxLen > 17 ), 3004 );
530 AssertH ( !(minLen < 1), 3005 );
531 BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
532 minLen, maxLen, alphaSize );
533 }
534
535
536 {
537 Bool inUse16[16];
538 for (i = 0; i < 16; i++) {
539 inUse16[i] = False;
540 for (j = 0; j < 16; j++)
541 if (s->inUse[i * 16 + j]) inUse16[i] = True;
542 }
543
544 nBytes = s->numZ;
545 for (i = 0; i < 16; i++)
546 if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
547
548 for (i = 0; i < 16; i++)
549 if (inUse16[i])
550 for (j = 0; j < 16; j++) {
551 if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
552 }
553
554 if (s->verbosity >= 3)
555 VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
556 }
557
558
559 nBytes = s->numZ;
560 bsW ( s, 3, nGroups );
561 bsW ( s, 15, nSelectors );
562 for (i = 0; i < nSelectors; i++) {
563 for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
564 bsW(s,1,0);
565 }
566 if (s->verbosity >= 3)
567 VPrintf1( "selectors %d, ", s->numZ-nBytes );
568
569
570 nBytes = s->numZ;
571
572 for (t = 0; t < nGroups; t++) {
573 Int32 curr = s->len[t][0];
574 bsW ( s, 5, curr );
575 for (i = 0; i < alphaSize; i++) {
576 while (curr < s->len[t][i]) { bsW(s,2,2); curr++; };
577 while (curr > s->len[t][i]) { bsW(s,2,3); curr--; };
578 bsW ( s, 1, 0 );
579 }
580 }
581
582 if (s->verbosity >= 3)
583 VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
584
585
586 nBytes = s->numZ;
587 selCtr = 0;
588 gs = 0;
589 while (True) {
590 if (gs >= s->nMTF) break;
591 ge = gs + BZ_G_SIZE - 1;
592 if (ge >= s->nMTF) ge = s->nMTF-1;
593 AssertH ( s->selector[selCtr] < nGroups, 3006 );
594
595 if (nGroups == 6 && 50 == ge-gs+1) {
596
597 UInt16 mtfv_i;
598 UChar* s_len_sel_selCtr
599 = &(s->len[s->selector[selCtr]][0]);
600 Int32* s_code_sel_selCtr
601 = &(s->code[s->selector[selCtr]][0]);
602
603# define BZ_ITAH(nn) \
604 mtfv_i = mtfv[gs+(nn)]; \
605 bsW ( s, \
606 s_len_sel_selCtr[mtfv_i], \
607 s_code_sel_selCtr[mtfv_i] )
608
609 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
610 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
611 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
612 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
613 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
614 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
615 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
616 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
617 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
618 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
619
620# undef BZ_ITAH
621
622 } else {
623
624 for (i = gs; i <= ge; i++) {
625 bsW ( s,
626 s->len [s->selector[selCtr]] [mtfv[i]],
627 s->code [s->selector[selCtr]] [mtfv[i]] );
628 }
629 }
630
631
632 gs = ge+1;
633 selCtr++;
634 }
635 AssertH( selCtr == nSelectors, 3007 );
636
637 if (s->verbosity >= 3)
638 VPrintf1( "codes %d\n", s->numZ-nBytes );
639}
640
641
642
643void BZ2_compressBlock ( EState* s, Bool is_last_block )
644{
645 if (s->nblock > 0) {
646
647 BZ_FINALISE_CRC ( s->blockCRC );
648 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
649 s->combinedCRC ^= s->blockCRC;
650 if (s->blockNo > 1) s->numZ = 0;
651
652 if (s->verbosity >= 2)
653 VPrintf4( " block %d: crc = 0x%08x, "
654 "combined CRC = 0x%08x, size = %d\n",
655 s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
656
657 BZ2_blockSort ( s );
658 }
659
660 s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
661
662
663 if (s->blockNo == 1) {
664 BZ2_bsInitWrite ( s );
665 bsPutUChar ( s, BZ_HDR_B );
666 bsPutUChar ( s, BZ_HDR_Z );
667 bsPutUChar ( s, BZ_HDR_h );
668 bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
669 }
670
671 if (s->nblock > 0) {
672
673 bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
674 bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
675 bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
676
677
678 bsPutUInt32 ( s, s->blockCRC );
679
680
681
682
683
684
685
686
687
688
689 bsW(s,1,0);
690
691 bsW ( s, 24, s->origPtr );
692 generateMTFValues ( s );
693 sendMTFValues ( s );
694 }
695
696
697
698 if (is_last_block) {
699
700 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
701 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
702 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
703 bsPutUInt32 ( s, s->combinedCRC );
704 if (s->verbosity >= 2)
705 VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC );
706 bsFinishWrite ( s );
707 }
708}
709
710
711
712
713
714