001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.compressors.bzip2; 020 021 import java.io.IOException; 022 import java.io.OutputStream; 023 024 import org.apache.commons.compress.compressors.CompressorOutputStream; 025 026 /** 027 * An output stream that compresses into the BZip2 format into another stream. 028 * 029 * <p> 030 * The compression requires large amounts of memory. Thus you should call the 031 * {@link #close() close()} method as soon as possible, to force 032 * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory. 033 * </p> 034 * 035 * <p> You can shrink the amount of allocated memory and maybe raise 036 * the compression speed by choosing a lower blocksize, which in turn 037 * may cause a lower compression ratio. You can avoid unnecessary 038 * memory allocation by avoiding using a blocksize which is bigger 039 * than the size of the input. </p> 040 * 041 * <p> You can compute the memory usage for compressing by the 042 * following formula: </p> 043 * 044 * <pre> 045 * <code>400k + (9 * blocksize)</code>. 046 * </pre> 047 * 048 * <p> To get the memory required for decompression by {@link 049 * BZip2CompressorInputStream} use </p> 050 * 051 * <pre> 052 * <code>65k + (5 * blocksize)</code>. 053 * </pre> 054 * 055 * <table width="100%" border="1"> 056 * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" /> 057 * </colgroup> 058 * <tr> 059 * <th colspan="3">Memory usage by blocksize</th> 060 * </tr> 061 * <tr> 062 * <th align="right">Blocksize</th> <th align="right">Compression<br> 063 * memory usage</th> <th align="right">Decompression<br> 064 * memory usage</th> 065 * </tr> 066 * <tr> 067 * <td align="right">100k</td> 068 * <td align="right">1300k</td> 069 * <td align="right">565k</td> 070 * </tr> 071 * <tr> 072 * <td align="right">200k</td> 073 * <td align="right">2200k</td> 074 * <td align="right">1065k</td> 075 * </tr> 076 * <tr> 077 * <td align="right">300k</td> 078 * <td align="right">3100k</td> 079 * <td align="right">1565k</td> 080 * </tr> 081 * <tr> 082 * <td align="right">400k</td> 083 * <td align="right">4000k</td> 084 * <td align="right">2065k</td> 085 * </tr> 086 * <tr> 087 * <td align="right">500k</td> 088 * <td align="right">4900k</td> 089 * <td align="right">2565k</td> 090 * </tr> 091 * <tr> 092 * <td align="right">600k</td> 093 * <td align="right">5800k</td> 094 * <td align="right">3065k</td> 095 * </tr> 096 * <tr> 097 * <td align="right">700k</td> 098 * <td align="right">6700k</td> 099 * <td align="right">3565k</td> 100 * </tr> 101 * <tr> 102 * <td align="right">800k</td> 103 * <td align="right">7600k</td> 104 * <td align="right">4065k</td> 105 * </tr> 106 * <tr> 107 * <td align="right">900k</td> 108 * <td align="right">8500k</td> 109 * <td align="right">4565k</td> 110 * </tr> 111 * </table> 112 * 113 * <p> 114 * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the 115 * bzipped input is smaller than one block. 116 * </p> 117 * 118 * <p> 119 * Instances of this class are not threadsafe. 120 * </p> 121 * 122 * <p> 123 * TODO: Update to BZip2 1.0.1 124 * </p> 125 * @NotThreadSafe 126 */ 127 public class BZip2CompressorOutputStream extends CompressorOutputStream 128 implements BZip2Constants { 129 130 /** 131 * The minimum supported blocksize <tt> == 1</tt>. 132 */ 133 public static final int MIN_BLOCKSIZE = 1; 134 135 /** 136 * The maximum supported blocksize <tt> == 9</tt>. 137 */ 138 public static final int MAX_BLOCKSIZE = 9; 139 140 private static final int GREATER_ICOST = 15; 141 private static final int LESSER_ICOST = 0; 142 143 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, 144 final Data dat, final int alphaSize, 145 final int maxLen) { 146 /* 147 * Nodes and heap entries run from 1. Entry 0 for both the heap and 148 * nodes is a sentinel. 149 */ 150 final int[] heap = dat.heap; 151 final int[] weight = dat.weight; 152 final int[] parent = dat.parent; 153 154 for (int i = alphaSize; --i >= 0;) { 155 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 156 } 157 158 for (boolean tooLong = true; tooLong;) { 159 tooLong = false; 160 161 int nNodes = alphaSize; 162 int nHeap = 0; 163 heap[0] = 0; 164 weight[0] = 0; 165 parent[0] = -2; 166 167 for (int i = 1; i <= alphaSize; i++) { 168 parent[i] = -1; 169 nHeap++; 170 heap[nHeap] = i; 171 172 int zz = nHeap; 173 int tmp = heap[zz]; 174 while (weight[tmp] < weight[heap[zz >> 1]]) { 175 heap[zz] = heap[zz >> 1]; 176 zz >>= 1; 177 } 178 heap[zz] = tmp; 179 } 180 181 while (nHeap > 1) { 182 int n1 = heap[1]; 183 heap[1] = heap[nHeap]; 184 nHeap--; 185 186 int yy = 0; 187 int zz = 1; 188 int tmp = heap[1]; 189 190 while (true) { 191 yy = zz << 1; 192 193 if (yy > nHeap) { 194 break; 195 } 196 197 if ((yy < nHeap) 198 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 199 yy++; 200 } 201 202 if (weight[tmp] < weight[heap[yy]]) { 203 break; 204 } 205 206 heap[zz] = heap[yy]; 207 zz = yy; 208 } 209 210 heap[zz] = tmp; 211 212 int n2 = heap[1]; 213 heap[1] = heap[nHeap]; 214 nHeap--; 215 216 yy = 0; 217 zz = 1; 218 tmp = heap[1]; 219 220 while (true) { 221 yy = zz << 1; 222 223 if (yy > nHeap) { 224 break; 225 } 226 227 if ((yy < nHeap) 228 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 229 yy++; 230 } 231 232 if (weight[tmp] < weight[heap[yy]]) { 233 break; 234 } 235 236 heap[zz] = heap[yy]; 237 zz = yy; 238 } 239 240 heap[zz] = tmp; 241 nNodes++; 242 parent[n1] = parent[n2] = nNodes; 243 244 final int weight_n1 = weight[n1]; 245 final int weight_n2 = weight[n2]; 246 weight[nNodes] = ((weight_n1 & 0xffffff00) 247 + (weight_n2 & 0xffffff00)) 248 | (1 + (((weight_n1 & 0x000000ff) 249 > (weight_n2 & 0x000000ff)) 250 ? (weight_n1 & 0x000000ff) 251 : (weight_n2 & 0x000000ff))); 252 253 parent[nNodes] = -1; 254 nHeap++; 255 heap[nHeap] = nNodes; 256 257 tmp = 0; 258 zz = nHeap; 259 tmp = heap[zz]; 260 final int weight_tmp = weight[tmp]; 261 while (weight_tmp < weight[heap[zz >> 1]]) { 262 heap[zz] = heap[zz >> 1]; 263 zz >>= 1; 264 } 265 heap[zz] = tmp; 266 267 } 268 269 for (int i = 1; i <= alphaSize; i++) { 270 int j = 0; 271 int k = i; 272 273 for (int parent_k; (parent_k = parent[k]) >= 0;) { 274 k = parent_k; 275 j++; 276 } 277 278 len[i - 1] = (byte) j; 279 if (j > maxLen) { 280 tooLong = true; 281 } 282 } 283 284 if (tooLong) { 285 for (int i = 1; i < alphaSize; i++) { 286 int j = weight[i] >> 8; 287 j = 1 + (j >> 1); 288 weight[i] = j << 8; 289 } 290 } 291 } 292 } 293 294 /** 295 * Index of the last char in the block, so the block size == last + 1. 296 */ 297 private int last; 298 299 /** 300 * Always: in the range 0 .. 9. The current block size is 100000 * this 301 * number. 302 */ 303 private final int blockSize100k; 304 305 private int bsBuff; 306 private int bsLive; 307 private final CRC crc = new CRC(); 308 309 private int nInUse; 310 311 private int nMTF; 312 313 private int currentChar = -1; 314 private int runLength = 0; 315 316 private int blockCRC; 317 private int combinedCRC; 318 private final int allowableBlockSize; 319 320 /** 321 * All memory intensive stuff. 322 */ 323 private Data data; 324 private BlockSort blockSorter; 325 326 private OutputStream out; 327 328 /** 329 * Chooses a blocksize based on the given length of the data to compress. 330 * 331 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and 332 * {@link #MAX_BLOCKSIZE} both inclusive. For a negative 333 * <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt> 334 * always. 335 * 336 * @param inputLength 337 * The length of the data which will be compressed by 338 * <tt>BZip2CompressorOutputStream</tt>. 339 */ 340 public static int chooseBlockSize(long inputLength) { 341 return (inputLength > 0) ? (int) Math 342 .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; 343 } 344 345 /** 346 * Constructs a new <tt>BZip2CompressorOutputStream</tt> with a blocksize of 900k. 347 * 348 * @param out 349 * the destination stream. 350 * 351 * @throws IOException 352 * if an I/O error occurs in the specified stream. 353 * @throws NullPointerException 354 * if <code>out == null</code>. 355 */ 356 public BZip2CompressorOutputStream(final OutputStream out) 357 throws IOException { 358 this(out, MAX_BLOCKSIZE); 359 } 360 361 /** 362 * Constructs a new <tt>BZip2CompressorOutputStream</tt> with specified blocksize. 363 * 364 * @param out 365 * the destination stream. 366 * @param blockSize 367 * the blockSize as 100k units. 368 * 369 * @throws IOException 370 * if an I/O error occurs in the specified stream. 371 * @throws IllegalArgumentException 372 * if <code>(blockSize < 1) || (blockSize > 9)</code>. 373 * @throws NullPointerException 374 * if <code>out == null</code>. 375 * 376 * @see #MIN_BLOCKSIZE 377 * @see #MAX_BLOCKSIZE 378 */ 379 public BZip2CompressorOutputStream(final OutputStream out, 380 final int blockSize) 381 throws IOException { 382 super(); 383 384 if (blockSize < 1) { 385 throw new IllegalArgumentException("blockSize(" + blockSize 386 + ") < 1"); 387 } 388 if (blockSize > 9) { 389 throw new IllegalArgumentException("blockSize(" + blockSize 390 + ") > 9"); 391 } 392 393 this.blockSize100k = blockSize; 394 this.out = out; 395 396 /* 20 is just a paranoia constant */ 397 this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; 398 init(); 399 } 400 401 /** {@inheritDoc} */ 402 @Override 403 public void write(final int b) throws IOException { 404 if (this.out != null) { 405 write0(b); 406 } else { 407 throw new IOException("closed"); 408 } 409 } 410 411 /** 412 * Writes the current byte to the buffer, run-length encoding it 413 * if it has been repeated at least four times (the first step 414 * RLEs sequences of four identical bytes). 415 * 416 * <p>Flushes the current block before writing data if it is 417 * full.</p> 418 * 419 * <p>"write to the buffer" means adding to data.buffer starting 420 * two steps "after" this.last - initially starting at index 1 421 * (not 0) - and updating this.last to point to the last index 422 * written minus 1.</p> 423 */ 424 private void writeRun() throws IOException { 425 final int lastShadow = this.last; 426 427 if (lastShadow < this.allowableBlockSize) { 428 final int currentCharShadow = this.currentChar; 429 final Data dataShadow = this.data; 430 dataShadow.inUse[currentCharShadow] = true; 431 final byte ch = (byte) currentCharShadow; 432 433 int runLengthShadow = this.runLength; 434 this.crc.updateCRC(currentCharShadow, runLengthShadow); 435 436 switch (runLengthShadow) { 437 case 1: 438 dataShadow.block[lastShadow + 2] = ch; 439 this.last = lastShadow + 1; 440 break; 441 442 case 2: 443 dataShadow.block[lastShadow + 2] = ch; 444 dataShadow.block[lastShadow + 3] = ch; 445 this.last = lastShadow + 2; 446 break; 447 448 case 3: { 449 final byte[] block = dataShadow.block; 450 block[lastShadow + 2] = ch; 451 block[lastShadow + 3] = ch; 452 block[lastShadow + 4] = ch; 453 this.last = lastShadow + 3; 454 } 455 break; 456 457 default: { 458 runLengthShadow -= 4; 459 dataShadow.inUse[runLengthShadow] = true; 460 final byte[] block = dataShadow.block; 461 block[lastShadow + 2] = ch; 462 block[lastShadow + 3] = ch; 463 block[lastShadow + 4] = ch; 464 block[lastShadow + 5] = ch; 465 block[lastShadow + 6] = (byte) runLengthShadow; 466 this.last = lastShadow + 5; 467 } 468 break; 469 470 } 471 } else { 472 endBlock(); 473 initBlock(); 474 writeRun(); 475 } 476 } 477 478 /** 479 * Overriden to close the stream. 480 */ 481 @Override 482 protected void finalize() throws Throwable { 483 finish(); 484 super.finalize(); 485 } 486 487 488 public void finish() throws IOException { 489 if (out != null) { 490 try { 491 if (this.runLength > 0) { 492 writeRun(); 493 } 494 this.currentChar = -1; 495 endBlock(); 496 endCompression(); 497 } finally { 498 this.out = null; 499 this.data = null; 500 this.blockSorter = null; 501 } 502 } 503 } 504 505 @Override 506 public void close() throws IOException { 507 if (out != null) { 508 OutputStream outShadow = this.out; 509 finish(); 510 outShadow.close(); 511 } 512 } 513 514 @Override 515 public void flush() throws IOException { 516 OutputStream outShadow = this.out; 517 if (outShadow != null) { 518 outShadow.flush(); 519 } 520 } 521 522 /** 523 * Writes magic bytes like BZ on the first position of the stream 524 * and bytes indiciating the file-format, which is 525 * huffmanised, followed by a digit indicating blockSize100k. 526 * @throws IOException if the magic bytes could not been written 527 */ 528 private void init() throws IOException { 529 bsPutUByte('B'); 530 bsPutUByte('Z'); 531 532 this.data = new Data(this.blockSize100k); 533 this.blockSorter = new BlockSort(this.data); 534 535 // huffmanised magic bytes 536 bsPutUByte('h'); 537 bsPutUByte('0' + this.blockSize100k); 538 539 this.combinedCRC = 0; 540 initBlock(); 541 } 542 543 private void initBlock() { 544 // blockNo++; 545 this.crc.initialiseCRC(); 546 this.last = -1; 547 // ch = 0; 548 549 boolean[] inUse = this.data.inUse; 550 for (int i = 256; --i >= 0;) { 551 inUse[i] = false; 552 } 553 554 } 555 556 private void endBlock() throws IOException { 557 this.blockCRC = this.crc.getFinalCRC(); 558 this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); 559 this.combinedCRC ^= this.blockCRC; 560 561 // empty block at end of file 562 if (this.last == -1) { 563 return; 564 } 565 566 /* sort the block and establish posn of original string */ 567 blockSort(); 568 569 /* 570 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 571 * :-). A 32 bit value does not really give a strong enough guarantee 572 * that the value will not appear by chance in the compressed 573 * datastream. Worst-case probability of this event, for a 900k block, 574 * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 575 * bits. For a compressed file of size 100Gb -- about 100000 blocks -- 576 * only a 48-bit marker will do. NB: normal compression/ decompression 577 * donot rely on these statistical properties. They are only important 578 * when trying to recover blocks from damaged files. 579 */ 580 bsPutUByte(0x31); 581 bsPutUByte(0x41); 582 bsPutUByte(0x59); 583 bsPutUByte(0x26); 584 bsPutUByte(0x53); 585 bsPutUByte(0x59); 586 587 /* Now the block's CRC, so it is in a known place. */ 588 bsPutInt(this.blockCRC); 589 590 /* Now a single bit indicating no randomisation. */ 591 bsW(1, 0); 592 593 /* Finally, block's contents proper. */ 594 moveToFrontCodeAndSend(); 595 } 596 597 private void endCompression() throws IOException { 598 /* 599 * Now another magic 48-bit number, 0x177245385090, to indicate the end 600 * of the last block. (sqrt(pi), if you want to know. I did want to use 601 * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me 602 * to feel statistically comfortable. Call me paranoid.) 603 */ 604 bsPutUByte(0x17); 605 bsPutUByte(0x72); 606 bsPutUByte(0x45); 607 bsPutUByte(0x38); 608 bsPutUByte(0x50); 609 bsPutUByte(0x90); 610 611 bsPutInt(this.combinedCRC); 612 bsFinishedWithStream(); 613 } 614 615 /** 616 * Returns the blocksize parameter specified at construction time. 617 */ 618 public final int getBlockSize() { 619 return this.blockSize100k; 620 } 621 622 @Override 623 public void write(final byte[] buf, int offs, final int len) 624 throws IOException { 625 if (offs < 0) { 626 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 627 } 628 if (len < 0) { 629 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 630 } 631 if (offs + len > buf.length) { 632 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 633 + len + ") > buf.length(" 634 + buf.length + ")."); 635 } 636 if (this.out == null) { 637 throw new IOException("stream closed"); 638 } 639 640 for (int hi = offs + len; offs < hi;) { 641 write0(buf[offs++]); 642 } 643 } 644 645 /** 646 * Keeps track of the last bytes written and implicitly performs 647 * run-length encoding as the first step of the bzip2 algorithm. 648 */ 649 private void write0(int b) throws IOException { 650 if (this.currentChar != -1) { 651 b &= 0xff; 652 if (this.currentChar == b) { 653 if (++this.runLength > 254) { 654 writeRun(); 655 this.currentChar = -1; 656 this.runLength = 0; 657 } 658 // else nothing to do 659 } else { 660 writeRun(); 661 this.runLength = 1; 662 this.currentChar = b; 663 } 664 } else { 665 this.currentChar = b & 0xff; 666 this.runLength++; 667 } 668 } 669 670 private static void hbAssignCodes(final int[] code, final byte[] length, 671 final int minLen, final int maxLen, 672 final int alphaSize) { 673 int vec = 0; 674 for (int n = minLen; n <= maxLen; n++) { 675 for (int i = 0; i < alphaSize; i++) { 676 if ((length[i] & 0xff) == n) { 677 code[i] = vec; 678 vec++; 679 } 680 } 681 vec <<= 1; 682 } 683 } 684 685 private void bsFinishedWithStream() throws IOException { 686 while (this.bsLive > 0) { 687 int ch = this.bsBuff >> 24; 688 this.out.write(ch); // write 8-bit 689 this.bsBuff <<= 8; 690 this.bsLive -= 8; 691 } 692 } 693 694 private void bsW(final int n, final int v) throws IOException { 695 final OutputStream outShadow = this.out; 696 int bsLiveShadow = this.bsLive; 697 int bsBuffShadow = this.bsBuff; 698 699 while (bsLiveShadow >= 8) { 700 outShadow.write(bsBuffShadow >> 24); // write 8-bit 701 bsBuffShadow <<= 8; 702 bsLiveShadow -= 8; 703 } 704 705 this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); 706 this.bsLive = bsLiveShadow + n; 707 } 708 709 private void bsPutUByte(final int c) throws IOException { 710 bsW(8, c); 711 } 712 713 private void bsPutInt(final int u) throws IOException { 714 bsW(8, (u >> 24) & 0xff); 715 bsW(8, (u >> 16) & 0xff); 716 bsW(8, (u >> 8) & 0xff); 717 bsW(8, u & 0xff); 718 } 719 720 private void sendMTFValues() throws IOException { 721 final byte[][] len = this.data.sendMTFValues_len; 722 final int alphaSize = this.nInUse + 2; 723 724 for (int t = N_GROUPS; --t >= 0;) { 725 byte[] len_t = len[t]; 726 for (int v = alphaSize; --v >= 0;) { 727 len_t[v] = GREATER_ICOST; 728 } 729 } 730 731 /* Decide how many coding tables to use */ 732 // assert (this.nMTF > 0) : this.nMTF; 733 final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 734 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; 735 736 /* Generate an initial set of coding tables */ 737 sendMTFValues0(nGroups, alphaSize); 738 739 /* 740 * Iterate up to N_ITERS times to improve the tables. 741 */ 742 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 743 744 /* Compute MTF values for the selectors. */ 745 sendMTFValues2(nGroups, nSelectors); 746 747 /* Assign actual codes for the tables. */ 748 sendMTFValues3(nGroups, alphaSize); 749 750 /* Transmit the mapping table. */ 751 sendMTFValues4(); 752 753 /* Now the selectors. */ 754 sendMTFValues5(nGroups, nSelectors); 755 756 /* Now the coding tables. */ 757 sendMTFValues6(nGroups, alphaSize); 758 759 /* And finally, the block data proper */ 760 sendMTFValues7(); 761 } 762 763 private void sendMTFValues0(final int nGroups, final int alphaSize) { 764 final byte[][] len = this.data.sendMTFValues_len; 765 final int[] mtfFreq = this.data.mtfFreq; 766 767 int remF = this.nMTF; 768 int gs = 0; 769 770 for (int nPart = nGroups; nPart > 0; nPart--) { 771 final int tFreq = remF / nPart; 772 int ge = gs - 1; 773 int aFreq = 0; 774 775 for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { 776 aFreq += mtfFreq[++ge]; 777 } 778 779 if ((ge > gs) && (nPart != nGroups) && (nPart != 1) 780 && (((nGroups - nPart) & 1) != 0)) { 781 aFreq -= mtfFreq[ge--]; 782 } 783 784 final byte[] len_np = len[nPart - 1]; 785 for (int v = alphaSize; --v >= 0;) { 786 if ((v >= gs) && (v <= ge)) { 787 len_np[v] = LESSER_ICOST; 788 } else { 789 len_np[v] = GREATER_ICOST; 790 } 791 } 792 793 gs = ge + 1; 794 remF -= aFreq; 795 } 796 } 797 798 private int sendMTFValues1(final int nGroups, final int alphaSize) { 799 final Data dataShadow = this.data; 800 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 801 final int[] fave = dataShadow.sendMTFValues_fave; 802 final short[] cost = dataShadow.sendMTFValues_cost; 803 final char[] sfmap = dataShadow.sfmap; 804 final byte[] selector = dataShadow.selector; 805 final byte[][] len = dataShadow.sendMTFValues_len; 806 final byte[] len_0 = len[0]; 807 final byte[] len_1 = len[1]; 808 final byte[] len_2 = len[2]; 809 final byte[] len_3 = len[3]; 810 final byte[] len_4 = len[4]; 811 final byte[] len_5 = len[5]; 812 final int nMTFShadow = this.nMTF; 813 814 int nSelectors = 0; 815 816 for (int iter = 0; iter < N_ITERS; iter++) { 817 for (int t = nGroups; --t >= 0;) { 818 fave[t] = 0; 819 int[] rfreqt = rfreq[t]; 820 for (int i = alphaSize; --i >= 0;) { 821 rfreqt[i] = 0; 822 } 823 } 824 825 nSelectors = 0; 826 827 for (int gs = 0; gs < this.nMTF;) { 828 /* Set group start & end marks. */ 829 830 /* 831 * Calculate the cost of this group as coded by each of the 832 * coding tables. 833 */ 834 835 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 836 837 if (nGroups == N_GROUPS) { 838 // unrolled version of the else-block 839 840 short cost0 = 0; 841 short cost1 = 0; 842 short cost2 = 0; 843 short cost3 = 0; 844 short cost4 = 0; 845 short cost5 = 0; 846 847 for (int i = gs; i <= ge; i++) { 848 final int icv = sfmap[i]; 849 cost0 += len_0[icv] & 0xff; 850 cost1 += len_1[icv] & 0xff; 851 cost2 += len_2[icv] & 0xff; 852 cost3 += len_3[icv] & 0xff; 853 cost4 += len_4[icv] & 0xff; 854 cost5 += len_5[icv] & 0xff; 855 } 856 857 cost[0] = cost0; 858 cost[1] = cost1; 859 cost[2] = cost2; 860 cost[3] = cost3; 861 cost[4] = cost4; 862 cost[5] = cost5; 863 864 } else { 865 for (int t = nGroups; --t >= 0;) { 866 cost[t] = 0; 867 } 868 869 for (int i = gs; i <= ge; i++) { 870 final int icv = sfmap[i]; 871 for (int t = nGroups; --t >= 0;) { 872 cost[t] += len[t][icv] & 0xff; 873 } 874 } 875 } 876 877 /* 878 * Find the coding table which is best for this group, and 879 * record its identity in the selector table. 880 */ 881 int bt = -1; 882 for (int t = nGroups, bc = 999999999; --t >= 0;) { 883 final int cost_t = cost[t]; 884 if (cost_t < bc) { 885 bc = cost_t; 886 bt = t; 887 } 888 } 889 890 fave[bt]++; 891 selector[nSelectors] = (byte) bt; 892 nSelectors++; 893 894 /* 895 * Increment the symbol frequencies for the selected table. 896 */ 897 final int[] rfreq_bt = rfreq[bt]; 898 for (int i = gs; i <= ge; i++) { 899 rfreq_bt[sfmap[i]]++; 900 } 901 902 gs = ge + 1; 903 } 904 905 /* 906 * Recompute the tables based on the accumulated frequencies. 907 */ 908 for (int t = 0; t < nGroups; t++) { 909 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 910 } 911 } 912 913 return nSelectors; 914 } 915 916 private void sendMTFValues2(final int nGroups, final int nSelectors) { 917 // assert (nGroups < 8) : nGroups; 918 919 final Data dataShadow = this.data; 920 byte[] pos = dataShadow.sendMTFValues2_pos; 921 922 for (int i = nGroups; --i >= 0;) { 923 pos[i] = (byte) i; 924 } 925 926 for (int i = 0; i < nSelectors; i++) { 927 final byte ll_i = dataShadow.selector[i]; 928 byte tmp = pos[0]; 929 int j = 0; 930 931 while (ll_i != tmp) { 932 j++; 933 byte tmp2 = tmp; 934 tmp = pos[j]; 935 pos[j] = tmp2; 936 } 937 938 pos[0] = tmp; 939 dataShadow.selectorMtf[i] = (byte) j; 940 } 941 } 942 943 private void sendMTFValues3(final int nGroups, final int alphaSize) { 944 int[][] code = this.data.sendMTFValues_code; 945 byte[][] len = this.data.sendMTFValues_len; 946 947 for (int t = 0; t < nGroups; t++) { 948 int minLen = 32; 949 int maxLen = 0; 950 final byte[] len_t = len[t]; 951 for (int i = alphaSize; --i >= 0;) { 952 final int l = len_t[i] & 0xff; 953 if (l > maxLen) { 954 maxLen = l; 955 } 956 if (l < minLen) { 957 minLen = l; 958 } 959 } 960 961 // assert (maxLen <= 20) : maxLen; 962 // assert (minLen >= 1) : minLen; 963 964 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 965 } 966 } 967 968 private void sendMTFValues4() throws IOException { 969 final boolean[] inUse = this.data.inUse; 970 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 971 972 for (int i = 16; --i >= 0;) { 973 inUse16[i] = false; 974 final int i16 = i * 16; 975 for (int j = 16; --j >= 0;) { 976 if (inUse[i16 + j]) { 977 inUse16[i] = true; 978 } 979 } 980 } 981 982 for (int i = 0; i < 16; i++) { 983 bsW(1, inUse16[i] ? 1 : 0); 984 } 985 986 final OutputStream outShadow = this.out; 987 int bsLiveShadow = this.bsLive; 988 int bsBuffShadow = this.bsBuff; 989 990 for (int i = 0; i < 16; i++) { 991 if (inUse16[i]) { 992 final int i16 = i * 16; 993 for (int j = 0; j < 16; j++) { 994 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 995 while (bsLiveShadow >= 8) { 996 outShadow.write(bsBuffShadow >> 24); // write 8-bit 997 bsBuffShadow <<= 8; 998 bsLiveShadow -= 8; 999 } 1000 if (inUse[i16 + j]) { 1001 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1002 } 1003 bsLiveShadow++; 1004 } 1005 } 1006 } 1007 1008 this.bsBuff = bsBuffShadow; 1009 this.bsLive = bsLiveShadow; 1010 } 1011 1012 private void sendMTFValues5(final int nGroups, final int nSelectors) 1013 throws IOException { 1014 bsW(3, nGroups); 1015 bsW(15, nSelectors); 1016 1017 final OutputStream outShadow = this.out; 1018 final byte[] selectorMtf = this.data.selectorMtf; 1019 1020 int bsLiveShadow = this.bsLive; 1021 int bsBuffShadow = this.bsBuff; 1022 1023 for (int i = 0; i < nSelectors; i++) { 1024 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1025 // inlined: bsW(1, 1); 1026 while (bsLiveShadow >= 8) { 1027 outShadow.write(bsBuffShadow >> 24); 1028 bsBuffShadow <<= 8; 1029 bsLiveShadow -= 8; 1030 } 1031 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1032 bsLiveShadow++; 1033 } 1034 1035 // inlined: bsW(1, 0); 1036 while (bsLiveShadow >= 8) { 1037 outShadow.write(bsBuffShadow >> 24); 1038 bsBuffShadow <<= 8; 1039 bsLiveShadow -= 8; 1040 } 1041 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1042 bsLiveShadow++; 1043 } 1044 1045 this.bsBuff = bsBuffShadow; 1046 this.bsLive = bsLiveShadow; 1047 } 1048 1049 private void sendMTFValues6(final int nGroups, final int alphaSize) 1050 throws IOException { 1051 final byte[][] len = this.data.sendMTFValues_len; 1052 final OutputStream outShadow = this.out; 1053 1054 int bsLiveShadow = this.bsLive; 1055 int bsBuffShadow = this.bsBuff; 1056 1057 for (int t = 0; t < nGroups; t++) { 1058 byte[] len_t = len[t]; 1059 int curr = len_t[0] & 0xff; 1060 1061 // inlined: bsW(5, curr); 1062 while (bsLiveShadow >= 8) { 1063 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1064 bsBuffShadow <<= 8; 1065 bsLiveShadow -= 8; 1066 } 1067 bsBuffShadow |= curr << (32 - bsLiveShadow - 5); 1068 bsLiveShadow += 5; 1069 1070 for (int i = 0; i < alphaSize; i++) { 1071 int lti = len_t[i] & 0xff; 1072 while (curr < lti) { 1073 // inlined: bsW(2, 2); 1074 while (bsLiveShadow >= 8) { 1075 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1076 bsBuffShadow <<= 8; 1077 bsLiveShadow -= 8; 1078 } 1079 bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); 1080 bsLiveShadow += 2; 1081 1082 curr++; /* 10 */ 1083 } 1084 1085 while (curr > lti) { 1086 // inlined: bsW(2, 3); 1087 while (bsLiveShadow >= 8) { 1088 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1089 bsBuffShadow <<= 8; 1090 bsLiveShadow -= 8; 1091 } 1092 bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); 1093 bsLiveShadow += 2; 1094 1095 curr--; /* 11 */ 1096 } 1097 1098 // inlined: bsW(1, 0); 1099 while (bsLiveShadow >= 8) { 1100 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1101 bsBuffShadow <<= 8; 1102 bsLiveShadow -= 8; 1103 } 1104 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1105 bsLiveShadow++; 1106 } 1107 } 1108 1109 this.bsBuff = bsBuffShadow; 1110 this.bsLive = bsLiveShadow; 1111 } 1112 1113 private void sendMTFValues7() throws IOException { 1114 final Data dataShadow = this.data; 1115 final byte[][] len = dataShadow.sendMTFValues_len; 1116 final int[][] code = dataShadow.sendMTFValues_code; 1117 final OutputStream outShadow = this.out; 1118 final byte[] selector = dataShadow.selector; 1119 final char[] sfmap = dataShadow.sfmap; 1120 final int nMTFShadow = this.nMTF; 1121 1122 int selCtr = 0; 1123 1124 int bsLiveShadow = this.bsLive; 1125 int bsBuffShadow = this.bsBuff; 1126 1127 for (int gs = 0; gs < nMTFShadow;) { 1128 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1129 final int selector_selCtr = selector[selCtr] & 0xff; 1130 final int[] code_selCtr = code[selector_selCtr]; 1131 final byte[] len_selCtr = len[selector_selCtr]; 1132 1133 while (gs <= ge) { 1134 final int sfmap_i = sfmap[gs]; 1135 1136 // 1137 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1138 // code_selCtr[sfmap_i]); 1139 // 1140 while (bsLiveShadow >= 8) { 1141 outShadow.write(bsBuffShadow >> 24); 1142 bsBuffShadow <<= 8; 1143 bsLiveShadow -= 8; 1144 } 1145 final int n = len_selCtr[sfmap_i] & 0xFF; 1146 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); 1147 bsLiveShadow += n; 1148 1149 gs++; 1150 } 1151 1152 gs = ge + 1; 1153 selCtr++; 1154 } 1155 1156 this.bsBuff = bsBuffShadow; 1157 this.bsLive = bsLiveShadow; 1158 } 1159 1160 private void moveToFrontCodeAndSend() throws IOException { 1161 bsW(24, this.data.origPtr); 1162 generateMTFValues(); 1163 sendMTFValues(); 1164 } 1165 1166 private void blockSort() { 1167 blockSorter.blockSort(data, last); 1168 } 1169 1170 /* 1171 * Performs Move-To-Front on the Burrows-Wheeler transformed 1172 * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB 1173 * run-length-encoded form. 1174 * 1175 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 1176 */ 1177 private void generateMTFValues() { 1178 final int lastShadow = this.last; 1179 final Data dataShadow = this.data; 1180 final boolean[] inUse = dataShadow.inUse; 1181 final byte[] block = dataShadow.block; 1182 final int[] fmap = dataShadow.fmap; 1183 final char[] sfmap = dataShadow.sfmap; 1184 final int[] mtfFreq = dataShadow.mtfFreq; 1185 final byte[] unseqToSeq = dataShadow.unseqToSeq; 1186 final byte[] yy = dataShadow.generateMTFValues_yy; 1187 1188 // make maps 1189 int nInUseShadow = 0; 1190 for (int i = 0; i < 256; i++) { 1191 if (inUse[i]) { 1192 unseqToSeq[i] = (byte) nInUseShadow; 1193 nInUseShadow++; 1194 } 1195 } 1196 this.nInUse = nInUseShadow; 1197 1198 final int eob = nInUseShadow + 1; 1199 1200 for (int i = eob; i >= 0; i--) { 1201 mtfFreq[i] = 0; 1202 } 1203 1204 for (int i = nInUseShadow; --i >= 0;) { 1205 yy[i] = (byte) i; 1206 } 1207 1208 int wr = 0; 1209 int zPend = 0; 1210 1211 for (int i = 0; i <= lastShadow; i++) { 1212 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 1213 byte tmp = yy[0]; 1214 int j = 0; 1215 1216 while (ll_i != tmp) { 1217 j++; 1218 byte tmp2 = tmp; 1219 tmp = yy[j]; 1220 yy[j] = tmp2; 1221 } 1222 yy[0] = tmp; 1223 1224 if (j == 0) { 1225 zPend++; 1226 } else { 1227 if (zPend > 0) { 1228 zPend--; 1229 while (true) { 1230 if ((zPend & 1) == 0) { 1231 sfmap[wr] = RUNA; 1232 wr++; 1233 mtfFreq[RUNA]++; 1234 } else { 1235 sfmap[wr] = RUNB; 1236 wr++; 1237 mtfFreq[RUNB]++; 1238 } 1239 1240 if (zPend >= 2) { 1241 zPend = (zPend - 2) >> 1; 1242 } else { 1243 break; 1244 } 1245 } 1246 zPend = 0; 1247 } 1248 sfmap[wr] = (char) (j + 1); 1249 wr++; 1250 mtfFreq[j + 1]++; 1251 } 1252 } 1253 1254 if (zPend > 0) { 1255 zPend--; 1256 while (true) { 1257 if ((zPend & 1) == 0) { 1258 sfmap[wr] = RUNA; 1259 wr++; 1260 mtfFreq[RUNA]++; 1261 } else { 1262 sfmap[wr] = RUNB; 1263 wr++; 1264 mtfFreq[RUNB]++; 1265 } 1266 1267 if (zPend >= 2) { 1268 zPend = (zPend - 2) >> 1; 1269 } else { 1270 break; 1271 } 1272 } 1273 } 1274 1275 sfmap[wr] = (char) eob; 1276 mtfFreq[eob]++; 1277 this.nMTF = wr + 1; 1278 } 1279 1280 static final class Data extends Object { 1281 1282 // with blockSize 900k 1283 /* maps unsigned byte => "does it occur in block" */ 1284 final boolean[] inUse = new boolean[256]; // 256 byte 1285 final byte[] unseqToSeq = new byte[256]; // 256 byte 1286 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 1287 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 1288 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 1289 1290 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 1291 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 1292 // byte 1293 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1294 // byte 1295 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 1296 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 1297 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1298 // byte 1299 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 1300 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 1301 1302 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 1303 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1304 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1305 1306 // ------------ 1307 // 333408 byte 1308 1309 /* holds the RLEd block of original data starting at index 1. 1310 * After sorting the last byte added to the buffer is at index 1311 * 0. */ 1312 final byte[] block; // 900021 byte 1313 /* maps index in Burrows-Wheeler transformed block => index of 1314 * byte in original block */ 1315 final int[] fmap; // 3600000 byte 1316 final char[] sfmap; // 3600000 byte 1317 // ------------ 1318 // 8433529 byte 1319 // ============ 1320 1321 /** 1322 * Index of original line in Burrows-Wheeler table. 1323 * 1324 * <p>This is the index in fmap that points to the last byte 1325 * of the original data.</p> 1326 */ 1327 int origPtr; 1328 1329 Data(int blockSize100k) { 1330 super(); 1331 1332 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; 1333 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; 1334 this.fmap = new int[n]; 1335 this.sfmap = new char[2 * n]; 1336 } 1337 1338 } 1339 1340 }