001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020 /*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025 package org.apache.commons.compress.compressors.bzip2;
026
027 import java.io.IOException;
028 import java.io.InputStream;
029
030 import org.apache.commons.compress.compressors.CompressorInputStream;
031
032 /**
033 * An input stream that decompresses from the BZip2 format to be read as any other stream.
034 *
035 * @NotThreadSafe
036 */
037 public class BZip2CompressorInputStream extends CompressorInputStream implements
038 BZip2Constants {
039
040 /**
041 * Index of the last char in the block, so the block size == last + 1.
042 */
043 private int last;
044
045 /**
046 * Index in zptr[] of original string after sorting.
047 */
048 private int origPtr;
049
050 /**
051 * always: in the range 0 .. 9. The current block size is 100000 * this
052 * number.
053 */
054 private int blockSize100k;
055
056 private boolean blockRandomised;
057
058 private int bsBuff;
059 private int bsLive;
060 private final CRC crc = new CRC();
061
062 private int nInUse;
063
064 private InputStream in;
065
066 private int currentChar = -1;
067
068 private static final int EOF = 0;
069 private static final int START_BLOCK_STATE = 1;
070 private static final int RAND_PART_A_STATE = 2;
071 private static final int RAND_PART_B_STATE = 3;
072 private static final int RAND_PART_C_STATE = 4;
073 private static final int NO_RAND_PART_A_STATE = 5;
074 private static final int NO_RAND_PART_B_STATE = 6;
075 private static final int NO_RAND_PART_C_STATE = 7;
076
077 private int currentState = START_BLOCK_STATE;
078
079 private int storedBlockCRC, storedCombinedCRC;
080 private int computedBlockCRC, computedCombinedCRC;
081
082 // Variables used by setup* methods exclusively
083
084 private int su_count;
085 private int su_ch2;
086 private int su_chPrev;
087 private int su_i2;
088 private int su_j2;
089 private int su_rNToGo;
090 private int su_rTPos;
091 private int su_tPos;
092 private char su_z;
093
094 /**
095 * All memory intensive stuff. This field is initialized by initBlock().
096 */
097 private BZip2CompressorInputStream.Data data;
098
099 /**
100 * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the
101 * specified stream.
102 *
103 * @throws IOException
104 * if the stream content is malformed or an I/O error occurs.
105 * @throws NullPointerException
106 * if <tt>in == null</tt>
107 */
108 public BZip2CompressorInputStream(final InputStream in) throws IOException {
109 super();
110
111 this.in = in;
112 init();
113 }
114
115 /** {@inheritDoc} */
116 public int read() throws IOException {
117 if (this.in != null) {
118 return read0();
119 } else {
120 throw new IOException("stream closed");
121 }
122 }
123
124 /*
125 * (non-Javadoc)
126 *
127 * @see java.io.InputStream#read(byte[], int, int)
128 */
129 public int read(final byte[] dest, final int offs, final int len)
130 throws IOException {
131 if (offs < 0) {
132 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
133 }
134 if (len < 0) {
135 throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
136 }
137 if (offs + len > dest.length) {
138 throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
139 + len + ") > dest.length(" + dest.length + ").");
140 }
141 if (this.in == null) {
142 throw new IOException("stream closed");
143 }
144
145 final int hi = offs + len;
146 int destOffs = offs;
147 for (int b; (destOffs < hi) && ((b = read0()) >= 0);) {
148 dest[destOffs++] = (byte) b;
149 }
150
151 return (destOffs == offs) ? -1 : (destOffs - offs);
152 }
153
154 private void makeMaps() {
155 final boolean[] inUse = this.data.inUse;
156 final byte[] seqToUnseq = this.data.seqToUnseq;
157
158 int nInUseShadow = 0;
159
160 for (int i = 0; i < 256; i++) {
161 if (inUse[i])
162 seqToUnseq[nInUseShadow++] = (byte) i;
163 }
164
165 this.nInUse = nInUseShadow;
166 }
167
168 private int read0() throws IOException {
169 final int retChar = this.currentChar;
170
171 switch (this.currentState) {
172 case EOF:
173 return -1;
174
175 case START_BLOCK_STATE:
176 throw new IllegalStateException();
177
178 case RAND_PART_A_STATE:
179 throw new IllegalStateException();
180
181 case RAND_PART_B_STATE:
182 setupRandPartB();
183 break;
184
185 case RAND_PART_C_STATE:
186 setupRandPartC();
187 break;
188
189 case NO_RAND_PART_A_STATE:
190 throw new IllegalStateException();
191
192 case NO_RAND_PART_B_STATE:
193 setupNoRandPartB();
194 break;
195
196 case NO_RAND_PART_C_STATE:
197 setupNoRandPartC();
198 break;
199
200 default:
201 throw new IllegalStateException();
202 }
203
204 return retChar;
205 }
206
207 private void init() throws IOException {
208 if (null == in) {
209 throw new IOException("No InputStream");
210 }
211 checkMagicChar('B', "first");
212 checkMagicChar('Z', "second");
213 checkMagicChar('h', "third");
214
215 int blockSize = this.in.read();
216 if ((blockSize < '1') || (blockSize > '9')) {
217 throw new IOException("Stream is not BZip2 formatted: illegal "
218 + "blocksize " + (char) blockSize);
219 }
220
221 this.blockSize100k = blockSize - '0';
222
223 initBlock();
224 setupBlock();
225 }
226
227 private void checkMagicChar(char expected, String position)
228 throws IOException {
229 int magic = this.in.read();
230 if (magic != expected) {
231 throw new IOException("Stream is not BZip2 formatted: expected '"
232 + expected + "' as " + position + " byte but got '"
233 + (char) magic + "'");
234 }
235 }
236
237 private void initBlock() throws IOException {
238 char magic0 = bsGetUByte();
239 char magic1 = bsGetUByte();
240 char magic2 = bsGetUByte();
241 char magic3 = bsGetUByte();
242 char magic4 = bsGetUByte();
243 char magic5 = bsGetUByte();
244
245 if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45
246 && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) {
247 complete(); // end of file
248 } else if (magic0 != 0x31 || // '1'
249 magic1 != 0x41 || // ')'
250 magic2 != 0x59 || // 'Y'
251 magic3 != 0x26 || // '&'
252 magic4 != 0x53 || // 'S'
253 magic5 != 0x59 // 'Y'
254 ) {
255 this.currentState = EOF;
256 throw new IOException("bad block header");
257 } else {
258 this.storedBlockCRC = bsGetInt();
259 this.blockRandomised = bsR(1) == 1;
260
261 /**
262 * Allocate data here instead in constructor, so we do not allocate
263 * it if the input file is empty.
264 */
265 if (this.data == null) {
266 this.data = new Data(this.blockSize100k);
267 }
268
269 // currBlockNo++;
270 getAndMoveToFrontDecode();
271
272 this.crc.initialiseCRC();
273 this.currentState = START_BLOCK_STATE;
274 }
275 }
276
277 private void endBlock() throws IOException {
278 this.computedBlockCRC = this.crc.getFinalCRC();
279
280 // A bad CRC is considered a fatal error.
281 if (this.storedBlockCRC != this.computedBlockCRC) {
282 // make next blocks readable without error
283 // (repair feature, not yet documented, not tested)
284 this.computedCombinedCRC = (this.storedCombinedCRC << 1)
285 | (this.storedCombinedCRC >>> 31);
286 this.computedCombinedCRC ^= this.storedBlockCRC;
287
288 throw new IOException("BZip2 CRC error");
289 }
290
291 this.computedCombinedCRC = (this.computedCombinedCRC << 1)
292 | (this.computedCombinedCRC >>> 31);
293 this.computedCombinedCRC ^= this.computedBlockCRC;
294 }
295
296 private void complete() throws IOException {
297 this.storedCombinedCRC = bsGetInt();
298 this.currentState = EOF;
299 this.data = null;
300
301 if (this.storedCombinedCRC != this.computedCombinedCRC) {
302 throw new IOException("BZip2 CRC error");
303 }
304 }
305
306 public void close() throws IOException {
307 InputStream inShadow = this.in;
308 if (inShadow != null) {
309 try {
310 if (inShadow != System.in) {
311 inShadow.close();
312 }
313 } finally {
314 this.data = null;
315 this.in = null;
316 }
317 }
318 }
319
320 private int bsR(final int n) throws IOException {
321 int bsLiveShadow = this.bsLive;
322 int bsBuffShadow = this.bsBuff;
323
324 if (bsLiveShadow < n) {
325 final InputStream inShadow = this.in;
326 do {
327 int thech = inShadow.read();
328
329 if (thech < 0) {
330 throw new IOException("unexpected end of stream");
331 }
332
333 bsBuffShadow = (bsBuffShadow << 8) | thech;
334 bsLiveShadow += 8;
335 } while (bsLiveShadow < n);
336
337 this.bsBuff = bsBuffShadow;
338 }
339
340 this.bsLive = bsLiveShadow - n;
341 return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
342 }
343
344 private boolean bsGetBit() throws IOException {
345 int bsLiveShadow = this.bsLive;
346 int bsBuffShadow = this.bsBuff;
347
348 if (bsLiveShadow < 1) {
349 int thech = this.in.read();
350
351 if (thech < 0) {
352 throw new IOException("unexpected end of stream");
353 }
354
355 bsBuffShadow = (bsBuffShadow << 8) | thech;
356 bsLiveShadow += 8;
357 this.bsBuff = bsBuffShadow;
358 }
359
360 this.bsLive = bsLiveShadow - 1;
361 return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0;
362 }
363
364 private char bsGetUByte() throws IOException {
365 return (char) bsR(8);
366 }
367
368 private int bsGetInt() throws IOException {
369 return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
370 }
371
372 /**
373 * Called by createHuffmanDecodingTables() exclusively.
374 */
375 private static void hbCreateDecodeTables(final int[] limit,
376 final int[] base, final int[] perm, final char[] length,
377 final int minLen, final int maxLen, final int alphaSize) {
378 for (int i = minLen, pp = 0; i <= maxLen; i++) {
379 for (int j = 0; j < alphaSize; j++) {
380 if (length[j] == i) {
381 perm[pp++] = j;
382 }
383 }
384 }
385
386 for (int i = MAX_CODE_LEN; --i > 0;) {
387 base[i] = 0;
388 limit[i] = 0;
389 }
390
391 for (int i = 0; i < alphaSize; i++) {
392 base[length[i] + 1]++;
393 }
394
395 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
396 b += base[i];
397 base[i] = b;
398 }
399
400 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
401 final int nb = base[i + 1];
402 vec += nb - b;
403 b = nb;
404 limit[i] = vec - 1;
405 vec <<= 1;
406 }
407
408 for (int i = minLen + 1; i <= maxLen; i++) {
409 base[i] = ((limit[i - 1] + 1) << 1) - base[i];
410 }
411 }
412
413 private void recvDecodingTables() throws IOException {
414 final Data dataShadow = this.data;
415 final boolean[] inUse = dataShadow.inUse;
416 final byte[] pos = dataShadow.recvDecodingTables_pos;
417 final byte[] selector = dataShadow.selector;
418 final byte[] selectorMtf = dataShadow.selectorMtf;
419
420 int inUse16 = 0;
421
422 /* Receive the mapping table */
423 for (int i = 0; i < 16; i++) {
424 if (bsGetBit()) {
425 inUse16 |= 1 << i;
426 }
427 }
428
429 for (int i = 256; --i >= 0;) {
430 inUse[i] = false;
431 }
432
433 for (int i = 0; i < 16; i++) {
434 if ((inUse16 & (1 << i)) != 0) {
435 final int i16 = i << 4;
436 for (int j = 0; j < 16; j++) {
437 if (bsGetBit()) {
438 inUse[i16 + j] = true;
439 }
440 }
441 }
442 }
443
444 makeMaps();
445 final int alphaSize = this.nInUse + 2;
446
447 /* Now the selectors */
448 final int nGroups = bsR(3);
449 final int nSelectors = bsR(15);
450
451 for (int i = 0; i < nSelectors; i++) {
452 int j = 0;
453 while (bsGetBit()) {
454 j++;
455 }
456 selectorMtf[i] = (byte) j;
457 }
458
459 /* Undo the MTF values for the selectors. */
460 for (int v = nGroups; --v >= 0;) {
461 pos[v] = (byte) v;
462 }
463
464 for (int i = 0; i < nSelectors; i++) {
465 int v = selectorMtf[i] & 0xff;
466 final byte tmp = pos[v];
467 while (v > 0) {
468 // nearly all times v is zero, 4 in most other cases
469 pos[v] = pos[v - 1];
470 v--;
471 }
472 pos[0] = tmp;
473 selector[i] = tmp;
474 }
475
476 final char[][] len = dataShadow.temp_charArray2d;
477
478 /* Now the coding tables */
479 for (int t = 0; t < nGroups; t++) {
480 int curr = bsR(5);
481 final char[] len_t = len[t];
482 for (int i = 0; i < alphaSize; i++) {
483 while (bsGetBit()) {
484 curr += bsGetBit() ? -1 : 1;
485 }
486 len_t[i] = (char) curr;
487 }
488 }
489
490 // finally create the Huffman tables
491 createHuffmanDecodingTables(alphaSize, nGroups);
492 }
493
494 /**
495 * Called by recvDecodingTables() exclusively.
496 */
497 private void createHuffmanDecodingTables(final int alphaSize,
498 final int nGroups) {
499 final Data dataShadow = this.data;
500 final char[][] len = dataShadow.temp_charArray2d;
501 final int[] minLens = dataShadow.minLens;
502 final int[][] limit = dataShadow.limit;
503 final int[][] base = dataShadow.base;
504 final int[][] perm = dataShadow.perm;
505
506 for (int t = 0; t < nGroups; t++) {
507 int minLen = 32;
508 int maxLen = 0;
509 final char[] len_t = len[t];
510 for (int i = alphaSize; --i >= 0;) {
511 final char lent = len_t[i];
512 if (lent > maxLen) {
513 maxLen = lent;
514 }
515 if (lent < minLen) {
516 minLen = lent;
517 }
518 }
519 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
520 maxLen, alphaSize);
521 minLens[t] = minLen;
522 }
523 }
524
525 private void getAndMoveToFrontDecode() throws IOException {
526 this.origPtr = bsR(24);
527 recvDecodingTables();
528
529 final InputStream inShadow = this.in;
530 final Data dataShadow = this.data;
531 final byte[] ll8 = dataShadow.ll8;
532 final int[] unzftab = dataShadow.unzftab;
533 final byte[] selector = dataShadow.selector;
534 final byte[] seqToUnseq = dataShadow.seqToUnseq;
535 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
536 final int[] minLens = dataShadow.minLens;
537 final int[][] limit = dataShadow.limit;
538 final int[][] base = dataShadow.base;
539 final int[][] perm = dataShadow.perm;
540 final int limitLast = this.blockSize100k * 100000;
541
542 /*
543 * Setting up the unzftab entries here is not strictly necessary, but it
544 * does save having to do it later in a separate pass, and so saves a
545 * block's worth of cache misses.
546 */
547 for (int i = 256; --i >= 0;) {
548 yy[i] = (char) i;
549 unzftab[i] = 0;
550 }
551
552 int groupNo = 0;
553 int groupPos = G_SIZE - 1;
554 final int eob = this.nInUse + 1;
555 int nextSym = getAndMoveToFrontDecode0(0);
556 int bsBuffShadow = this.bsBuff;
557 int bsLiveShadow = this.bsLive;
558 int lastShadow = -1;
559 int zt = selector[groupNo] & 0xff;
560 int[] base_zt = base[zt];
561 int[] limit_zt = limit[zt];
562 int[] perm_zt = perm[zt];
563 int minLens_zt = minLens[zt];
564
565 while (nextSym != eob) {
566 if ((nextSym == RUNA) || (nextSym == RUNB)) {
567 int s = -1;
568
569 for (int n = 1; true; n <<= 1) {
570 if (nextSym == RUNA) {
571 s += n;
572 } else if (nextSym == RUNB) {
573 s += n << 1;
574 } else {
575 break;
576 }
577
578 if (groupPos == 0) {
579 groupPos = G_SIZE - 1;
580 zt = selector[++groupNo] & 0xff;
581 base_zt = base[zt];
582 limit_zt = limit[zt];
583 perm_zt = perm[zt];
584 minLens_zt = minLens[zt];
585 } else {
586 groupPos--;
587 }
588
589 int zn = minLens_zt;
590
591 // Inlined:
592 // int zvec = bsR(zn);
593 while (bsLiveShadow < zn) {
594 final int thech = inShadow.read();
595 if (thech >= 0) {
596 bsBuffShadow = (bsBuffShadow << 8) | thech;
597 bsLiveShadow += 8;
598 continue;
599 } else {
600 throw new IOException("unexpected end of stream");
601 }
602 }
603 int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
604 & ((1 << zn) - 1);
605 bsLiveShadow -= zn;
606
607 while (zvec > limit_zt[zn]) {
608 zn++;
609 while (bsLiveShadow < 1) {
610 final int thech = inShadow.read();
611 if (thech >= 0) {
612 bsBuffShadow = (bsBuffShadow << 8) | thech;
613 bsLiveShadow += 8;
614 continue;
615 } else {
616 throw new IOException(
617 "unexpected end of stream");
618 }
619 }
620 bsLiveShadow--;
621 zvec = (zvec << 1)
622 | ((bsBuffShadow >> bsLiveShadow) & 1);
623 }
624 nextSym = perm_zt[zvec - base_zt[zn]];
625 }
626
627 final byte ch = seqToUnseq[yy[0]];
628 unzftab[ch & 0xff] += s + 1;
629
630 while (s-- >= 0) {
631 ll8[++lastShadow] = ch;
632 }
633
634 if (lastShadow >= limitLast) {
635 throw new IOException("block overrun");
636 }
637 } else {
638 if (++lastShadow >= limitLast) {
639 throw new IOException("block overrun");
640 }
641
642 final char tmp = yy[nextSym - 1];
643 unzftab[seqToUnseq[tmp] & 0xff]++;
644 ll8[lastShadow] = seqToUnseq[tmp];
645
646 /*
647 * This loop is hammered during decompression, hence avoid
648 * native method call overhead of System.arraycopy for very
649 * small ranges to copy.
650 */
651 if (nextSym <= 16) {
652 for (int j = nextSym - 1; j > 0;) {
653 yy[j] = yy[--j];
654 }
655 } else {
656 System.arraycopy(yy, 0, yy, 1, nextSym - 1);
657 }
658
659 yy[0] = tmp;
660
661 if (groupPos == 0) {
662 groupPos = G_SIZE - 1;
663 zt = selector[++groupNo] & 0xff;
664 base_zt = base[zt];
665 limit_zt = limit[zt];
666 perm_zt = perm[zt];
667 minLens_zt = minLens[zt];
668 } else {
669 groupPos--;
670 }
671
672 int zn = minLens_zt;
673
674 // Inlined:
675 // int zvec = bsR(zn);
676 while (bsLiveShadow < zn) {
677 final int thech = inShadow.read();
678 if (thech >= 0) {
679 bsBuffShadow = (bsBuffShadow << 8) | thech;
680 bsLiveShadow += 8;
681 continue;
682 } else {
683 throw new IOException("unexpected end of stream");
684 }
685 }
686 int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
687 & ((1 << zn) - 1);
688 bsLiveShadow -= zn;
689
690 while (zvec > limit_zt[zn]) {
691 zn++;
692 while (bsLiveShadow < 1) {
693 final int thech = inShadow.read();
694 if (thech >= 0) {
695 bsBuffShadow = (bsBuffShadow << 8) | thech;
696 bsLiveShadow += 8;
697 continue;
698 } else {
699 throw new IOException("unexpected end of stream");
700 }
701 }
702 bsLiveShadow--;
703 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
704 }
705 nextSym = perm_zt[zvec - base_zt[zn]];
706 }
707 }
708
709 this.last = lastShadow;
710 this.bsLive = bsLiveShadow;
711 this.bsBuff = bsBuffShadow;
712 }
713
714 private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
715 final InputStream inShadow = this.in;
716 final Data dataShadow = this.data;
717 final int zt = dataShadow.selector[groupNo] & 0xff;
718 final int[] limit_zt = dataShadow.limit[zt];
719 int zn = dataShadow.minLens[zt];
720 int zvec = bsR(zn);
721 int bsLiveShadow = this.bsLive;
722 int bsBuffShadow = this.bsBuff;
723
724 while (zvec > limit_zt[zn]) {
725 zn++;
726 while (bsLiveShadow < 1) {
727 final int thech = inShadow.read();
728
729 if (thech >= 0) {
730 bsBuffShadow = (bsBuffShadow << 8) | thech;
731 bsLiveShadow += 8;
732 continue;
733 } else {
734 throw new IOException("unexpected end of stream");
735 }
736 }
737 bsLiveShadow--;
738 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
739 }
740
741 this.bsLive = bsLiveShadow;
742 this.bsBuff = bsBuffShadow;
743
744 return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
745 }
746
747 private void setupBlock() throws IOException {
748 if (this.data == null) {
749 return;
750 }
751
752 final int[] cftab = this.data.cftab;
753 final int[] tt = this.data.initTT(this.last + 1);
754 final byte[] ll8 = this.data.ll8;
755 cftab[0] = 0;
756 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
757
758 for (int i = 1, c = cftab[0]; i <= 256; i++) {
759 c += cftab[i];
760 cftab[i] = c;
761 }
762
763 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
764 tt[cftab[ll8[i] & 0xff]++] = i;
765 }
766
767 if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
768 throw new IOException("stream corrupted");
769 }
770
771 this.su_tPos = tt[this.origPtr];
772 this.su_count = 0;
773 this.su_i2 = 0;
774 this.su_ch2 = 256; /* not a char and not EOF */
775
776 if (this.blockRandomised) {
777 this.su_rNToGo = 0;
778 this.su_rTPos = 0;
779 setupRandPartA();
780 } else {
781 setupNoRandPartA();
782 }
783 }
784
785 private void setupRandPartA() throws IOException {
786 if (this.su_i2 <= this.last) {
787 this.su_chPrev = this.su_ch2;
788 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
789 this.su_tPos = this.data.tt[this.su_tPos];
790 if (this.su_rNToGo == 0) {
791 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
792 if (++this.su_rTPos == 512) {
793 this.su_rTPos = 0;
794 }
795 } else {
796 this.su_rNToGo--;
797 }
798 this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
799 this.su_i2++;
800 this.currentChar = su_ch2Shadow;
801 this.currentState = RAND_PART_B_STATE;
802 this.crc.updateCRC(su_ch2Shadow);
803 } else {
804 endBlock();
805 initBlock();
806 setupBlock();
807 }
808 }
809
810 private void setupNoRandPartA() throws IOException {
811 if (this.su_i2 <= this.last) {
812 this.su_chPrev = this.su_ch2;
813 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
814 this.su_ch2 = su_ch2Shadow;
815 this.su_tPos = this.data.tt[this.su_tPos];
816 this.su_i2++;
817 this.currentChar = su_ch2Shadow;
818 this.currentState = NO_RAND_PART_B_STATE;
819 this.crc.updateCRC(su_ch2Shadow);
820 } else {
821 this.currentState = NO_RAND_PART_A_STATE;
822 endBlock();
823 initBlock();
824 setupBlock();
825 }
826 }
827
828 private void setupRandPartB() throws IOException {
829 if (this.su_ch2 != this.su_chPrev) {
830 this.currentState = RAND_PART_A_STATE;
831 this.su_count = 1;
832 setupRandPartA();
833 } else if (++this.su_count >= 4) {
834 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
835 this.su_tPos = this.data.tt[this.su_tPos];
836 if (this.su_rNToGo == 0) {
837 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
838 if (++this.su_rTPos == 512) {
839 this.su_rTPos = 0;
840 }
841 } else {
842 this.su_rNToGo--;
843 }
844 this.su_j2 = 0;
845 this.currentState = RAND_PART_C_STATE;
846 if (this.su_rNToGo == 1) {
847 this.su_z ^= 1;
848 }
849 setupRandPartC();
850 } else {
851 this.currentState = RAND_PART_A_STATE;
852 setupRandPartA();
853 }
854 }
855
856 private void setupRandPartC() throws IOException {
857 if (this.su_j2 < this.su_z) {
858 this.currentChar = this.su_ch2;
859 this.crc.updateCRC(this.su_ch2);
860 this.su_j2++;
861 } else {
862 this.currentState = RAND_PART_A_STATE;
863 this.su_i2++;
864 this.su_count = 0;
865 setupRandPartA();
866 }
867 }
868
869 private void setupNoRandPartB() throws IOException {
870 if (this.su_ch2 != this.su_chPrev) {
871 this.su_count = 1;
872 setupNoRandPartA();
873 } else if (++this.su_count >= 4) {
874 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
875 this.su_tPos = this.data.tt[this.su_tPos];
876 this.su_j2 = 0;
877 setupNoRandPartC();
878 } else {
879 setupNoRandPartA();
880 }
881 }
882
883 private void setupNoRandPartC() throws IOException {
884 if (this.su_j2 < this.su_z) {
885 int su_ch2Shadow = this.su_ch2;
886 this.currentChar = su_ch2Shadow;
887 this.crc.updateCRC(su_ch2Shadow);
888 this.su_j2++;
889 this.currentState = NO_RAND_PART_C_STATE;
890 } else {
891 this.su_i2++;
892 this.su_count = 0;
893 setupNoRandPartA();
894 }
895 }
896
897 private static final class Data extends Object {
898
899 // (with blockSize 900k)
900 final boolean[] inUse = new boolean[256]; // 256 byte
901
902 final byte[] seqToUnseq = new byte[256]; // 256 byte
903 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
904 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
905
906 /**
907 * Freq table collected to save a pass over the data during
908 * decompression.
909 */
910 final int[] unzftab = new int[256]; // 1024 byte
911
912 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
913 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
914 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
915 final int[] minLens = new int[N_GROUPS]; // 24 byte
916
917 final int[] cftab = new int[257]; // 1028 byte
918 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
919 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
920 // byte
921 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
922 // ---------------
923 // 60798 byte
924
925 int[] tt; // 3600000 byte
926 byte[] ll8; // 900000 byte
927
928 // ---------------
929 // 4560782 byte
930 // ===============
931
932 Data(int blockSize100k) {
933 super();
934
935 this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
936 }
937
938 /**
939 * Initializes the {@link #tt} array.
940 *
941 * This method is called when the required length of the array is known.
942 * I don't initialize it at construction time to avoid unneccessary
943 * memory allocation when compressing small files.
944 */
945 int[] initTT(int length) {
946 int[] ttShadow = this.tt;
947
948 // tt.length should always be >= length, but theoretically
949 // it can happen, if the compressor mixed small and large
950 // blocks. Normally only the last block will be smaller
951 // than others.
952 if ((ttShadow == null) || (ttShadow.length < length)) {
953 this.tt = ttShadow = new int[length];
954 }
955
956 return ttShadow;
957 }
958
959 }
960
961 /**
962 * Checks if the signature matches what is expected for a bzip2 file.
963 *
964 * @param signature
965 * the bytes to check
966 * @param length
967 * the number of bytes to check
968 * @return true, if this stream is a bzip2 compressed stream, false otherwise
969 *
970 * @since Apache Commons Compress 1.1
971 */
972 public static boolean matches(byte[] signature, int length) {
973
974 if (length < 3) {
975 return false;
976 }
977
978 if (signature[0] != 'B') {
979 return false;
980 }
981
982 if (signature[1] != 'Z') {
983 return false;
984 }
985
986 if (signature[2] != 'h') {
987 return false;
988 }
989
990 return true;
991 }
992 }