001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.compressors.bzip2;
020    
021    import java.io.IOException;
022    import java.io.OutputStream;
023    
024    import org.apache.commons.compress.compressors.CompressorOutputStream;
025    
026    /**
027     * An output stream that compresses into the BZip2 format into another stream.
028     *
029     * <p>
030     * The compression requires large amounts of memory. Thus you should call the
031     * {@link #close() close()} method as soon as possible, to force
032     * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory.
033     * </p>
034     *
035     * <p> You can shrink the amount of allocated memory and maybe raise
036     * the compression speed by choosing a lower blocksize, which in turn
037     * may cause a lower compression ratio. You can avoid unnecessary
038     * memory allocation by avoiding using a blocksize which is bigger
039     * than the size of the input.  </p>
040     *
041     * <p> You can compute the memory usage for compressing by the
042     * following formula: </p>
043     *
044     * <pre>
045     * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
046     * </pre>
047     *
048     * <p> To get the memory required for decompression by {@link
049     * BZip2CompressorInputStream} use </p>
050     *
051     * <pre>
052     * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
053     * </pre>
054     *
055     * <table width="100%" border="1">
056     * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" />
057     * </colgroup>
058     * <tr>
059     * <th colspan="3">Memory usage by blocksize</th>
060     * </tr>
061     * <tr>
062     * <th align="right">Blocksize</th> <th align="right">Compression<br>
063     * memory usage</th> <th align="right">Decompression<br>
064     * memory usage</th>
065     * </tr>
066     * <tr>
067     * <td align="right">100k</td>
068     * <td align="right">1300k</td>
069     * <td align="right">565k</td>
070     * </tr>
071     * <tr>
072     * <td align="right">200k</td>
073     * <td align="right">2200k</td>
074     * <td align="right">1065k</td>
075     * </tr>
076     * <tr>
077     * <td align="right">300k</td>
078     * <td align="right">3100k</td>
079     * <td align="right">1565k</td>
080     * </tr>
081     * <tr>
082     * <td align="right">400k</td>
083     * <td align="right">4000k</td>
084     * <td align="right">2065k</td>
085     * </tr>
086     * <tr>
087     * <td align="right">500k</td>
088     * <td align="right">4900k</td>
089     * <td align="right">2565k</td>
090     * </tr>
091     * <tr>
092     * <td align="right">600k</td>
093     * <td align="right">5800k</td>
094     * <td align="right">3065k</td>
095     * </tr>
096     * <tr>
097     * <td align="right">700k</td>
098     * <td align="right">6700k</td>
099     * <td align="right">3565k</td>
100     * </tr>
101     * <tr>
102     * <td align="right">800k</td>
103     * <td align="right">7600k</td>
104     * <td align="right">4065k</td>
105     * </tr>
106     * <tr>
107     * <td align="right">900k</td>
108     * <td align="right">8500k</td>
109     * <td align="right">4565k</td>
110     * </tr>
111     * </table>
112     *
113     * <p>
114     * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the
115     * bzipped input is smaller than one block.
116     * </p>
117     *
118     * <p>
119     * Instances of this class are not threadsafe.
120     * </p>
121     *
122     * <p>
123     * TODO: Update to BZip2 1.0.1
124     * </p>
125     * @NotThreadSafe
126     */
127    public class BZip2CompressorOutputStream extends CompressorOutputStream
128        implements BZip2Constants {
129    
130        /**
131         * The minimum supported blocksize <tt> == 1</tt>.
132         */
133        public static final int MIN_BLOCKSIZE = 1;
134    
135        /**
136         * The maximum supported blocksize <tt> == 9</tt>.
137         */
138        public static final int MAX_BLOCKSIZE = 9;
139    
140        private static final int GREATER_ICOST = 15;
141        private static final int LESSER_ICOST = 0;
142    
143        private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
144                                              final Data dat, final int alphaSize,
145                                              final int maxLen) {
146            /*
147             * Nodes and heap entries run from 1. Entry 0 for both the heap and
148             * nodes is a sentinel.
149             */
150            final int[] heap = dat.heap;
151            final int[] weight = dat.weight;
152            final int[] parent = dat.parent;
153    
154            for (int i = alphaSize; --i >= 0;) {
155                weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
156            }
157    
158            for (boolean tooLong = true; tooLong;) {
159                tooLong = false;
160    
161                int nNodes = alphaSize;
162                int nHeap = 0;
163                heap[0] = 0;
164                weight[0] = 0;
165                parent[0] = -2;
166    
167                for (int i = 1; i <= alphaSize; i++) {
168                    parent[i] = -1;
169                    nHeap++;
170                    heap[nHeap] = i;
171    
172                    int zz = nHeap;
173                    int tmp = heap[zz];
174                    while (weight[tmp] < weight[heap[zz >> 1]]) {
175                        heap[zz] = heap[zz >> 1];
176                        zz >>= 1;
177                    }
178                    heap[zz] = tmp;
179                }
180    
181                while (nHeap > 1) {
182                    int n1 = heap[1];
183                    heap[1] = heap[nHeap];
184                    nHeap--;
185    
186                    int yy = 0;
187                    int zz = 1;
188                    int tmp = heap[1];
189    
190                    while (true) {
191                        yy = zz << 1;
192    
193                        if (yy > nHeap) {
194                            break;
195                        }
196    
197                        if ((yy < nHeap)
198                            && (weight[heap[yy + 1]] < weight[heap[yy]])) {
199                            yy++;
200                        }
201    
202                        if (weight[tmp] < weight[heap[yy]]) {
203                            break;
204                        }
205    
206                        heap[zz] = heap[yy];
207                        zz = yy;
208                    }
209    
210                    heap[zz] = tmp;
211    
212                    int n2 = heap[1];
213                    heap[1] = heap[nHeap];
214                    nHeap--;
215    
216                    yy = 0;
217                    zz = 1;
218                    tmp = heap[1];
219    
220                    while (true) {
221                        yy = zz << 1;
222    
223                        if (yy > nHeap) {
224                            break;
225                        }
226    
227                        if ((yy < nHeap)
228                            && (weight[heap[yy + 1]] < weight[heap[yy]])) {
229                            yy++;
230                        }
231    
232                        if (weight[tmp] < weight[heap[yy]]) {
233                            break;
234                        }
235    
236                        heap[zz] = heap[yy];
237                        zz = yy;
238                    }
239    
240                    heap[zz] = tmp;
241                    nNodes++;
242                    parent[n1] = parent[n2] = nNodes;
243    
244                    final int weight_n1 = weight[n1];
245                    final int weight_n2 = weight[n2];
246                    weight[nNodes] = ((weight_n1 & 0xffffff00)
247                                      + (weight_n2 & 0xffffff00))
248                        | (1 + (((weight_n1 & 0x000000ff)
249                                 > (weight_n2 & 0x000000ff))
250                                ? (weight_n1 & 0x000000ff)
251                                : (weight_n2 & 0x000000ff)));
252    
253                    parent[nNodes] = -1;
254                    nHeap++;
255                    heap[nHeap] = nNodes;
256    
257                    tmp = 0;
258                    zz = nHeap;
259                    tmp = heap[zz];
260                    final int weight_tmp = weight[tmp];
261                    while (weight_tmp < weight[heap[zz >> 1]]) {
262                        heap[zz] = heap[zz >> 1];
263                        zz >>= 1;
264                    }
265                    heap[zz] = tmp;
266    
267                }
268    
269                for (int i = 1; i <= alphaSize; i++) {
270                    int j = 0;
271                    int k = i;
272    
273                    for (int parent_k; (parent_k = parent[k]) >= 0;) {
274                        k = parent_k;
275                        j++;
276                    }
277    
278                    len[i - 1] = (byte) j;
279                    if (j > maxLen) {
280                        tooLong = true;
281                    }
282                }
283    
284                if (tooLong) {
285                    for (int i = 1; i < alphaSize; i++) {
286                        int j = weight[i] >> 8;
287                        j = 1 + (j >> 1);
288                        weight[i] = j << 8;
289                    }
290                }
291            }
292        }
293    
294        /**
295         * Index of the last char in the block, so the block size == last + 1.
296         */
297        private int last;
298    
299        /**
300         * Always: in the range 0 .. 9. The current block size is 100000 * this
301         * number.
302         */
303        private final int blockSize100k;
304    
305        private int bsBuff;
306        private int bsLive;
307        private final CRC crc = new CRC();
308    
309        private int nInUse;
310    
311        private int nMTF;
312    
313        private int currentChar = -1;
314        private int runLength = 0;
315    
316        private int blockCRC;
317        private int combinedCRC;
318        private final int allowableBlockSize;
319    
320        /**
321         * All memory intensive stuff.
322         */
323        private Data data;
324        private BlockSort blockSorter;
325    
326        private OutputStream out;
327    
328        /**
329         * Chooses a blocksize based on the given length of the data to compress.
330         *
331         * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
332         *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
333         *         <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
334         *         always.
335         *
336         * @param inputLength
337         *            The length of the data which will be compressed by
338         *            <tt>BZip2CompressorOutputStream</tt>.
339         */
340        public static int chooseBlockSize(long inputLength) {
341            return (inputLength > 0) ? (int) Math
342                .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
343        }
344    
345        /**
346         * Constructs a new <tt>BZip2CompressorOutputStream</tt> with a blocksize of 900k.
347         *
348         * @param out 
349         *            the destination stream.
350         *
351         * @throws IOException
352         *             if an I/O error occurs in the specified stream.
353         * @throws NullPointerException
354         *             if <code>out == null</code>.
355         */
356        public BZip2CompressorOutputStream(final OutputStream out)
357            throws IOException {
358            this(out, MAX_BLOCKSIZE);
359        }
360    
361        /**
362         * Constructs a new <tt>BZip2CompressorOutputStream</tt> with specified blocksize.
363         *
364         * @param out
365         *            the destination stream.
366         * @param blockSize
367         *            the blockSize as 100k units.
368         *
369         * @throws IOException
370         *             if an I/O error occurs in the specified stream.
371         * @throws IllegalArgumentException
372         *             if <code>(blockSize < 1) || (blockSize > 9)</code>.
373         * @throws NullPointerException
374         *             if <code>out == null</code>.
375         *
376         * @see #MIN_BLOCKSIZE
377         * @see #MAX_BLOCKSIZE
378         */
379        public BZip2CompressorOutputStream(final OutputStream out,
380                                           final int blockSize)
381            throws IOException {
382            super();
383    
384            if (blockSize < 1) {
385                throw new IllegalArgumentException("blockSize(" + blockSize
386                                                   + ") < 1");
387            }
388            if (blockSize > 9) {
389                throw new IllegalArgumentException("blockSize(" + blockSize
390                                                   + ") > 9");
391            }
392    
393            this.blockSize100k = blockSize;
394            this.out = out;
395    
396            /* 20 is just a paranoia constant */
397            this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
398            init();
399        }
400    
401        /** {@inheritDoc} */
402        @Override
403        public void write(final int b) throws IOException {
404            if (this.out != null) {
405                write0(b);
406            } else {
407                throw new IOException("closed");
408            }
409        }
410    
411        /**
412         * Writes the current byte to the buffer, run-length encoding it
413         * if it has been repeated at least four times (the first step
414         * RLEs sequences of four identical bytes).
415         *
416         * <p>Flushes the current block before writing data if it is
417         * full.</p>
418         *
419         * <p>"write to the buffer" means adding to data.buffer starting
420         * two steps "after" this.last - initially starting at index 1
421         * (not 0) - and updating this.last to point to the last index
422         * written minus 1.</p>
423         */
424        private void writeRun() throws IOException {
425            final int lastShadow = this.last;
426    
427            if (lastShadow < this.allowableBlockSize) {
428                final int currentCharShadow = this.currentChar;
429                final Data dataShadow = this.data;
430                dataShadow.inUse[currentCharShadow] = true;
431                final byte ch = (byte) currentCharShadow;
432    
433                int runLengthShadow = this.runLength;
434                this.crc.updateCRC(currentCharShadow, runLengthShadow);
435    
436                switch (runLengthShadow) {
437                case 1:
438                    dataShadow.block[lastShadow + 2] = ch;
439                    this.last = lastShadow + 1;
440                    break;
441    
442                case 2:
443                    dataShadow.block[lastShadow + 2] = ch;
444                    dataShadow.block[lastShadow + 3] = ch;
445                    this.last = lastShadow + 2;
446                    break;
447    
448                case 3: {
449                    final byte[] block = dataShadow.block;
450                    block[lastShadow + 2] = ch;
451                    block[lastShadow + 3] = ch;
452                    block[lastShadow + 4] = ch;
453                    this.last = lastShadow + 3;
454                }
455                    break;
456    
457                default: {
458                    runLengthShadow -= 4;
459                    dataShadow.inUse[runLengthShadow] = true;
460                    final byte[] block = dataShadow.block;
461                    block[lastShadow + 2] = ch;
462                    block[lastShadow + 3] = ch;
463                    block[lastShadow + 4] = ch;
464                    block[lastShadow + 5] = ch;
465                    block[lastShadow + 6] = (byte) runLengthShadow;
466                    this.last = lastShadow + 5;
467                }
468                    break;
469    
470                }
471            } else {
472                endBlock();
473                initBlock();
474                writeRun();
475            }
476        }
477    
478        /**
479         * Overriden to close the stream.
480         */
481        @Override
482        protected void finalize() throws Throwable {
483            finish();
484            super.finalize();
485        }
486    
487    
488        public void finish() throws IOException {
489            if (out != null) {
490                try {
491                    if (this.runLength > 0) {
492                        writeRun();
493                    }
494                    this.currentChar = -1;
495                    endBlock();
496                    endCompression();
497                } finally {
498                    this.out = null;
499                    this.data = null;
500                    this.blockSorter = null;
501                }
502            }
503        }
504    
505        @Override
506        public void close() throws IOException {
507            if (out != null) {
508                OutputStream outShadow = this.out;
509                finish();
510                outShadow.close();
511            }
512        }
513    
514        @Override
515        public void flush() throws IOException {
516            OutputStream outShadow = this.out;
517            if (outShadow != null) {
518                outShadow.flush();
519            }
520        }
521    
522        /**
523         * Writes magic bytes like BZ on the first position of the stream
524         * and bytes indiciating the file-format, which is 
525         * huffmanised, followed by a digit indicating blockSize100k.
526         * @throws IOException if the magic bytes could not been written
527         */
528        private void init() throws IOException {
529            bsPutUByte('B');
530            bsPutUByte('Z');
531    
532            this.data = new Data(this.blockSize100k);
533            this.blockSorter = new BlockSort(this.data);
534    
535            // huffmanised magic bytes
536            bsPutUByte('h');
537            bsPutUByte('0' + this.blockSize100k);
538    
539            this.combinedCRC = 0;
540            initBlock();
541        }
542    
543        private void initBlock() {
544            // blockNo++;
545            this.crc.initialiseCRC();
546            this.last = -1;
547            // ch = 0;
548    
549            boolean[] inUse = this.data.inUse;
550            for (int i = 256; --i >= 0;) {
551                inUse[i] = false;
552            }
553    
554        }
555    
556        private void endBlock() throws IOException {
557            this.blockCRC = this.crc.getFinalCRC();
558            this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
559            this.combinedCRC ^= this.blockCRC;
560    
561            // empty block at end of file
562            if (this.last == -1) {
563                return;
564            }
565    
566            /* sort the block and establish posn of original string */
567            blockSort();
568    
569            /*
570             * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
571             * :-). A 32 bit value does not really give a strong enough guarantee
572             * that the value will not appear by chance in the compressed
573             * datastream. Worst-case probability of this event, for a 900k block,
574             * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
575             * bits. For a compressed file of size 100Gb -- about 100000 blocks --
576             * only a 48-bit marker will do. NB: normal compression/ decompression
577             * donot rely on these statistical properties. They are only important
578             * when trying to recover blocks from damaged files.
579             */
580            bsPutUByte(0x31);
581            bsPutUByte(0x41);
582            bsPutUByte(0x59);
583            bsPutUByte(0x26);
584            bsPutUByte(0x53);
585            bsPutUByte(0x59);
586    
587            /* Now the block's CRC, so it is in a known place. */
588            bsPutInt(this.blockCRC);
589    
590            /* Now a single bit indicating no randomisation. */
591            bsW(1, 0);
592    
593            /* Finally, block's contents proper. */
594            moveToFrontCodeAndSend();
595        }
596    
597        private void endCompression() throws IOException {
598            /*
599             * Now another magic 48-bit number, 0x177245385090, to indicate the end
600             * of the last block. (sqrt(pi), if you want to know. I did want to use
601             * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
602             * to feel statistically comfortable. Call me paranoid.)
603             */
604            bsPutUByte(0x17);
605            bsPutUByte(0x72);
606            bsPutUByte(0x45);
607            bsPutUByte(0x38);
608            bsPutUByte(0x50);
609            bsPutUByte(0x90);
610    
611            bsPutInt(this.combinedCRC);
612            bsFinishedWithStream();
613        }
614    
615        /**
616         * Returns the blocksize parameter specified at construction time.
617         */
618        public final int getBlockSize() {
619            return this.blockSize100k;
620        }
621    
622        @Override
623        public void write(final byte[] buf, int offs, final int len)
624            throws IOException {
625            if (offs < 0) {
626                throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
627            }
628            if (len < 0) {
629                throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
630            }
631            if (offs + len > buf.length) {
632                throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
633                                                    + len + ") > buf.length("
634                                                    + buf.length + ").");
635            }
636            if (this.out == null) {
637                throw new IOException("stream closed");
638            }
639    
640            for (int hi = offs + len; offs < hi;) {
641                write0(buf[offs++]);
642            }
643        }
644    
645        /**
646         * Keeps track of the last bytes written and implicitly performs
647         * run-length encoding as the first step of the bzip2 algorithm.
648         */
649        private void write0(int b) throws IOException {
650            if (this.currentChar != -1) {
651                b &= 0xff;
652                if (this.currentChar == b) {
653                    if (++this.runLength > 254) {
654                        writeRun();
655                        this.currentChar = -1;
656                        this.runLength = 0;
657                    }
658                    // else nothing to do
659                } else {
660                    writeRun();
661                    this.runLength = 1;
662                    this.currentChar = b;
663                }
664            } else {
665                this.currentChar = b & 0xff;
666                this.runLength++;
667            }
668        }
669    
670        private static void hbAssignCodes(final int[] code, final byte[] length,
671                                          final int minLen, final int maxLen,
672                                          final int alphaSize) {
673            int vec = 0;
674            for (int n = minLen; n <= maxLen; n++) {
675                for (int i = 0; i < alphaSize; i++) {
676                    if ((length[i] & 0xff) == n) {
677                        code[i] = vec;
678                        vec++;
679                    }
680                }
681                vec <<= 1;
682            }
683        }
684    
685        private void bsFinishedWithStream() throws IOException {
686            while (this.bsLive > 0) {
687                int ch = this.bsBuff >> 24;
688                this.out.write(ch); // write 8-bit
689                this.bsBuff <<= 8;
690                this.bsLive -= 8;
691            }
692        }
693    
694        private void bsW(final int n, final int v) throws IOException {
695            final OutputStream outShadow = this.out;
696            int bsLiveShadow = this.bsLive;
697            int bsBuffShadow = this.bsBuff;
698    
699            while (bsLiveShadow >= 8) {
700                outShadow.write(bsBuffShadow >> 24); // write 8-bit
701                bsBuffShadow <<= 8;
702                bsLiveShadow -= 8;
703            }
704    
705            this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
706            this.bsLive = bsLiveShadow + n;
707        }
708    
709        private void bsPutUByte(final int c) throws IOException {
710            bsW(8, c);
711        }
712    
713        private void bsPutInt(final int u) throws IOException {
714            bsW(8, (u >> 24) & 0xff);
715            bsW(8, (u >> 16) & 0xff);
716            bsW(8, (u >> 8) & 0xff);
717            bsW(8, u & 0xff);
718        }
719    
720        private void sendMTFValues() throws IOException {
721            final byte[][] len = this.data.sendMTFValues_len;
722            final int alphaSize = this.nInUse + 2;
723    
724            for (int t = N_GROUPS; --t >= 0;) {
725                byte[] len_t = len[t];
726                for (int v = alphaSize; --v >= 0;) {
727                    len_t[v] = GREATER_ICOST;
728                }
729            }
730    
731            /* Decide how many coding tables to use */
732            // assert (this.nMTF > 0) : this.nMTF;
733            final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
734                : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
735    
736            /* Generate an initial set of coding tables */
737            sendMTFValues0(nGroups, alphaSize);
738    
739            /*
740             * Iterate up to N_ITERS times to improve the tables.
741             */
742            final int nSelectors = sendMTFValues1(nGroups, alphaSize);
743    
744            /* Compute MTF values for the selectors. */
745            sendMTFValues2(nGroups, nSelectors);
746    
747            /* Assign actual codes for the tables. */
748            sendMTFValues3(nGroups, alphaSize);
749    
750            /* Transmit the mapping table. */
751            sendMTFValues4();
752    
753            /* Now the selectors. */
754            sendMTFValues5(nGroups, nSelectors);
755    
756            /* Now the coding tables. */
757            sendMTFValues6(nGroups, alphaSize);
758    
759            /* And finally, the block data proper */
760            sendMTFValues7();
761        }
762    
763        private void sendMTFValues0(final int nGroups, final int alphaSize) {
764            final byte[][] len = this.data.sendMTFValues_len;
765            final int[] mtfFreq = this.data.mtfFreq;
766    
767            int remF = this.nMTF;
768            int gs = 0;
769    
770            for (int nPart = nGroups; nPart > 0; nPart--) {
771                final int tFreq = remF / nPart;
772                int ge = gs - 1;
773                int aFreq = 0;
774    
775                for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
776                    aFreq += mtfFreq[++ge];
777                }
778    
779                if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
780                    && (((nGroups - nPart) & 1) != 0)) {
781                    aFreq -= mtfFreq[ge--];
782                }
783    
784                final byte[] len_np = len[nPart - 1];
785                for (int v = alphaSize; --v >= 0;) {
786                    if ((v >= gs) && (v <= ge)) {
787                        len_np[v] = LESSER_ICOST;
788                    } else {
789                        len_np[v] = GREATER_ICOST;
790                    }
791                }
792    
793                gs = ge + 1;
794                remF -= aFreq;
795            }
796        }
797    
798        private int sendMTFValues1(final int nGroups, final int alphaSize) {
799            final Data dataShadow = this.data;
800            final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
801            final int[] fave = dataShadow.sendMTFValues_fave;
802            final short[] cost = dataShadow.sendMTFValues_cost;
803            final char[] sfmap = dataShadow.sfmap;
804            final byte[] selector = dataShadow.selector;
805            final byte[][] len = dataShadow.sendMTFValues_len;
806            final byte[] len_0 = len[0];
807            final byte[] len_1 = len[1];
808            final byte[] len_2 = len[2];
809            final byte[] len_3 = len[3];
810            final byte[] len_4 = len[4];
811            final byte[] len_5 = len[5];
812            final int nMTFShadow = this.nMTF;
813    
814            int nSelectors = 0;
815    
816            for (int iter = 0; iter < N_ITERS; iter++) {
817                for (int t = nGroups; --t >= 0;) {
818                    fave[t] = 0;
819                    int[] rfreqt = rfreq[t];
820                    for (int i = alphaSize; --i >= 0;) {
821                        rfreqt[i] = 0;
822                    }
823                }
824    
825                nSelectors = 0;
826    
827                for (int gs = 0; gs < this.nMTF;) {
828                    /* Set group start & end marks. */
829    
830                    /*
831                     * Calculate the cost of this group as coded by each of the
832                     * coding tables.
833                     */
834    
835                    final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
836    
837                    if (nGroups == N_GROUPS) {
838                        // unrolled version of the else-block
839    
840                        short cost0 = 0;
841                        short cost1 = 0;
842                        short cost2 = 0;
843                        short cost3 = 0;
844                        short cost4 = 0;
845                        short cost5 = 0;
846    
847                        for (int i = gs; i <= ge; i++) {
848                            final int icv = sfmap[i];
849                            cost0 += len_0[icv] & 0xff;
850                            cost1 += len_1[icv] & 0xff;
851                            cost2 += len_2[icv] & 0xff;
852                            cost3 += len_3[icv] & 0xff;
853                            cost4 += len_4[icv] & 0xff;
854                            cost5 += len_5[icv] & 0xff;
855                        }
856    
857                        cost[0] = cost0;
858                        cost[1] = cost1;
859                        cost[2] = cost2;
860                        cost[3] = cost3;
861                        cost[4] = cost4;
862                        cost[5] = cost5;
863    
864                    } else {
865                        for (int t = nGroups; --t >= 0;) {
866                            cost[t] = 0;
867                        }
868    
869                        for (int i = gs; i <= ge; i++) {
870                            final int icv = sfmap[i];
871                            for (int t = nGroups; --t >= 0;) {
872                                cost[t] += len[t][icv] & 0xff;
873                            }
874                        }
875                    }
876    
877                    /*
878                     * Find the coding table which is best for this group, and
879                     * record its identity in the selector table.
880                     */
881                    int bt = -1;
882                    for (int t = nGroups, bc = 999999999; --t >= 0;) {
883                        final int cost_t = cost[t];
884                        if (cost_t < bc) {
885                            bc = cost_t;
886                            bt = t;
887                        }
888                    }
889    
890                    fave[bt]++;
891                    selector[nSelectors] = (byte) bt;
892                    nSelectors++;
893    
894                    /*
895                     * Increment the symbol frequencies for the selected table.
896                     */
897                    final int[] rfreq_bt = rfreq[bt];
898                    for (int i = gs; i <= ge; i++) {
899                        rfreq_bt[sfmap[i]]++;
900                    }
901    
902                    gs = ge + 1;
903                }
904    
905                /*
906                 * Recompute the tables based on the accumulated frequencies.
907                 */
908                for (int t = 0; t < nGroups; t++) {
909                    hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
910                }
911            }
912    
913            return nSelectors;
914        }
915    
916        private void sendMTFValues2(final int nGroups, final int nSelectors) {
917            // assert (nGroups < 8) : nGroups;
918    
919            final Data dataShadow = this.data;
920            byte[] pos = dataShadow.sendMTFValues2_pos;
921    
922            for (int i = nGroups; --i >= 0;) {
923                pos[i] = (byte) i;
924            }
925    
926            for (int i = 0; i < nSelectors; i++) {
927                final byte ll_i = dataShadow.selector[i];
928                byte tmp = pos[0];
929                int j = 0;
930    
931                while (ll_i != tmp) {
932                    j++;
933                    byte tmp2 = tmp;
934                    tmp = pos[j];
935                    pos[j] = tmp2;
936                }
937    
938                pos[0] = tmp;
939                dataShadow.selectorMtf[i] = (byte) j;
940            }
941        }
942    
943        private void sendMTFValues3(final int nGroups, final int alphaSize) {
944            int[][] code = this.data.sendMTFValues_code;
945            byte[][] len = this.data.sendMTFValues_len;
946    
947            for (int t = 0; t < nGroups; t++) {
948                int minLen = 32;
949                int maxLen = 0;
950                final byte[] len_t = len[t];
951                for (int i = alphaSize; --i >= 0;) {
952                    final int l = len_t[i] & 0xff;
953                    if (l > maxLen) {
954                        maxLen = l;
955                    }
956                    if (l < minLen) {
957                        minLen = l;
958                    }
959                }
960    
961                // assert (maxLen <= 20) : maxLen;
962                // assert (minLen >= 1) : minLen;
963    
964                hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
965            }
966        }
967    
968        private void sendMTFValues4() throws IOException {
969            final boolean[] inUse = this.data.inUse;
970            final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
971    
972            for (int i = 16; --i >= 0;) {
973                inUse16[i] = false;
974                final int i16 = i * 16;
975                for (int j = 16; --j >= 0;) {
976                    if (inUse[i16 + j]) {
977                        inUse16[i] = true;
978                    }
979                }
980            }
981    
982            for (int i = 0; i < 16; i++) {
983                bsW(1, inUse16[i] ? 1 : 0);
984            }
985    
986            final OutputStream outShadow = this.out;
987            int bsLiveShadow = this.bsLive;
988            int bsBuffShadow = this.bsBuff;
989    
990            for (int i = 0; i < 16; i++) {
991                if (inUse16[i]) {
992                    final int i16 = i * 16;
993                    for (int j = 0; j < 16; j++) {
994                        // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
995                        while (bsLiveShadow >= 8) {
996                            outShadow.write(bsBuffShadow >> 24); // write 8-bit
997                            bsBuffShadow <<= 8;
998                            bsLiveShadow -= 8;
999                        }
1000                        if (inUse[i16 + j]) {
1001                            bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1002                        }
1003                        bsLiveShadow++;
1004                    }
1005                }
1006            }
1007    
1008            this.bsBuff = bsBuffShadow;
1009            this.bsLive = bsLiveShadow;
1010        }
1011    
1012        private void sendMTFValues5(final int nGroups, final int nSelectors)
1013            throws IOException {
1014            bsW(3, nGroups);
1015            bsW(15, nSelectors);
1016    
1017            final OutputStream outShadow = this.out;
1018            final byte[] selectorMtf = this.data.selectorMtf;
1019    
1020            int bsLiveShadow = this.bsLive;
1021            int bsBuffShadow = this.bsBuff;
1022    
1023            for (int i = 0; i < nSelectors; i++) {
1024                for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1025                    // inlined: bsW(1, 1);
1026                    while (bsLiveShadow >= 8) {
1027                        outShadow.write(bsBuffShadow >> 24);
1028                        bsBuffShadow <<= 8;
1029                        bsLiveShadow -= 8;
1030                    }
1031                    bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1032                    bsLiveShadow++;
1033                }
1034    
1035                // inlined: bsW(1, 0);
1036                while (bsLiveShadow >= 8) {
1037                    outShadow.write(bsBuffShadow >> 24);
1038                    bsBuffShadow <<= 8;
1039                    bsLiveShadow -= 8;
1040                }
1041                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1042                bsLiveShadow++;
1043            }
1044    
1045            this.bsBuff = bsBuffShadow;
1046            this.bsLive = bsLiveShadow;
1047        }
1048    
1049        private void sendMTFValues6(final int nGroups, final int alphaSize)
1050            throws IOException {
1051            final byte[][] len = this.data.sendMTFValues_len;
1052            final OutputStream outShadow = this.out;
1053    
1054            int bsLiveShadow = this.bsLive;
1055            int bsBuffShadow = this.bsBuff;
1056    
1057            for (int t = 0; t < nGroups; t++) {
1058                byte[] len_t = len[t];
1059                int curr = len_t[0] & 0xff;
1060    
1061                // inlined: bsW(5, curr);
1062                while (bsLiveShadow >= 8) {
1063                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
1064                    bsBuffShadow <<= 8;
1065                    bsLiveShadow -= 8;
1066                }
1067                bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1068                bsLiveShadow += 5;
1069    
1070                for (int i = 0; i < alphaSize; i++) {
1071                    int lti = len_t[i] & 0xff;
1072                    while (curr < lti) {
1073                        // inlined: bsW(2, 2);
1074                        while (bsLiveShadow >= 8) {
1075                            outShadow.write(bsBuffShadow >> 24); // write 8-bit
1076                            bsBuffShadow <<= 8;
1077                            bsLiveShadow -= 8;
1078                        }
1079                        bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1080                        bsLiveShadow += 2;
1081    
1082                        curr++; /* 10 */
1083                    }
1084    
1085                    while (curr > lti) {
1086                        // inlined: bsW(2, 3);
1087                        while (bsLiveShadow >= 8) {
1088                            outShadow.write(bsBuffShadow >> 24); // write 8-bit
1089                            bsBuffShadow <<= 8;
1090                            bsLiveShadow -= 8;
1091                        }
1092                        bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1093                        bsLiveShadow += 2;
1094    
1095                        curr--; /* 11 */
1096                    }
1097    
1098                    // inlined: bsW(1, 0);
1099                    while (bsLiveShadow >= 8) {
1100                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1101                        bsBuffShadow <<= 8;
1102                        bsLiveShadow -= 8;
1103                    }
1104                    // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1105                    bsLiveShadow++;
1106                }
1107            }
1108    
1109            this.bsBuff = bsBuffShadow;
1110            this.bsLive = bsLiveShadow;
1111        }
1112    
1113        private void sendMTFValues7() throws IOException {
1114            final Data dataShadow = this.data;
1115            final byte[][] len = dataShadow.sendMTFValues_len;
1116            final int[][] code = dataShadow.sendMTFValues_code;
1117            final OutputStream outShadow = this.out;
1118            final byte[] selector = dataShadow.selector;
1119            final char[] sfmap = dataShadow.sfmap;
1120            final int nMTFShadow = this.nMTF;
1121    
1122            int selCtr = 0;
1123    
1124            int bsLiveShadow = this.bsLive;
1125            int bsBuffShadow = this.bsBuff;
1126    
1127            for (int gs = 0; gs < nMTFShadow;) {
1128                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1129                final int selector_selCtr = selector[selCtr] & 0xff;
1130                final int[] code_selCtr = code[selector_selCtr];
1131                final byte[] len_selCtr = len[selector_selCtr];
1132    
1133                while (gs <= ge) {
1134                    final int sfmap_i = sfmap[gs];
1135    
1136                    //
1137                    // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1138                    // code_selCtr[sfmap_i]);
1139                    //
1140                    while (bsLiveShadow >= 8) {
1141                        outShadow.write(bsBuffShadow >> 24);
1142                        bsBuffShadow <<= 8;
1143                        bsLiveShadow -= 8;
1144                    }
1145                    final int n = len_selCtr[sfmap_i] & 0xFF;
1146                    bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1147                    bsLiveShadow += n;
1148    
1149                    gs++;
1150                }
1151    
1152                gs = ge + 1;
1153                selCtr++;
1154            }
1155    
1156            this.bsBuff = bsBuffShadow;
1157            this.bsLive = bsLiveShadow;
1158        }
1159    
1160        private void moveToFrontCodeAndSend() throws IOException {
1161            bsW(24, this.data.origPtr);
1162            generateMTFValues();
1163            sendMTFValues();
1164        }
1165    
1166        private void blockSort() {
1167            blockSorter.blockSort(data, last);
1168        }
1169    
1170        /*
1171         * Performs Move-To-Front on the Burrows-Wheeler transformed
1172         * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1173         * run-length-encoded form.
1174         *
1175         * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1176         */
1177        private void generateMTFValues() {
1178            final int lastShadow = this.last;
1179            final Data dataShadow = this.data;
1180            final boolean[] inUse = dataShadow.inUse;
1181            final byte[] block = dataShadow.block;
1182            final int[] fmap = dataShadow.fmap;
1183            final char[] sfmap = dataShadow.sfmap;
1184            final int[] mtfFreq = dataShadow.mtfFreq;
1185            final byte[] unseqToSeq = dataShadow.unseqToSeq;
1186            final byte[] yy = dataShadow.generateMTFValues_yy;
1187    
1188            // make maps
1189            int nInUseShadow = 0;
1190            for (int i = 0; i < 256; i++) {
1191                if (inUse[i]) {
1192                    unseqToSeq[i] = (byte) nInUseShadow;
1193                    nInUseShadow++;
1194                }
1195            }
1196            this.nInUse = nInUseShadow;
1197    
1198            final int eob = nInUseShadow + 1;
1199    
1200            for (int i = eob; i >= 0; i--) {
1201                mtfFreq[i] = 0;
1202            }
1203    
1204            for (int i = nInUseShadow; --i >= 0;) {
1205                yy[i] = (byte) i;
1206            }
1207    
1208            int wr = 0;
1209            int zPend = 0;
1210    
1211            for (int i = 0; i <= lastShadow; i++) {
1212                final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1213                byte tmp = yy[0];
1214                int j = 0;
1215    
1216                while (ll_i != tmp) {
1217                    j++;
1218                    byte tmp2 = tmp;
1219                    tmp = yy[j];
1220                    yy[j] = tmp2;
1221                }
1222                yy[0] = tmp;
1223    
1224                if (j == 0) {
1225                    zPend++;
1226                } else {
1227                    if (zPend > 0) {
1228                        zPend--;
1229                        while (true) {
1230                            if ((zPend & 1) == 0) {
1231                                sfmap[wr] = RUNA;
1232                                wr++;
1233                                mtfFreq[RUNA]++;
1234                            } else {
1235                                sfmap[wr] = RUNB;
1236                                wr++;
1237                                mtfFreq[RUNB]++;
1238                            }
1239    
1240                            if (zPend >= 2) {
1241                                zPend = (zPend - 2) >> 1;
1242                            } else {
1243                                break;
1244                            }
1245                        }
1246                        zPend = 0;
1247                    }
1248                    sfmap[wr] = (char) (j + 1);
1249                    wr++;
1250                    mtfFreq[j + 1]++;
1251                }
1252            }
1253    
1254            if (zPend > 0) {
1255                zPend--;
1256                while (true) {
1257                    if ((zPend & 1) == 0) {
1258                        sfmap[wr] = RUNA;
1259                        wr++;
1260                        mtfFreq[RUNA]++;
1261                    } else {
1262                        sfmap[wr] = RUNB;
1263                        wr++;
1264                        mtfFreq[RUNB]++;
1265                    }
1266    
1267                    if (zPend >= 2) {
1268                        zPend = (zPend - 2) >> 1;
1269                    } else {
1270                        break;
1271                    }
1272                }
1273            }
1274    
1275            sfmap[wr] = (char) eob;
1276            mtfFreq[eob]++;
1277            this.nMTF = wr + 1;
1278        }
1279    
1280        static final class Data extends Object {
1281    
1282            // with blockSize 900k
1283            /* maps unsigned byte => "does it occur in block" */
1284            final boolean[] inUse = new boolean[256]; // 256 byte
1285            final byte[] unseqToSeq = new byte[256]; // 256 byte
1286            final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1287            final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1288            final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1289    
1290            final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1291            final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1292            // byte
1293            final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1294            // byte
1295            final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1296            final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1297            final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1298            // byte
1299            final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1300            final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1301    
1302            final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1303            final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1304            final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1305    
1306            // ------------
1307            // 333408 byte
1308    
1309            /* holds the RLEd block of original data starting at index 1.
1310             * After sorting the last byte added to the buffer is at index
1311             * 0. */
1312            final byte[] block; // 900021 byte
1313            /* maps index in Burrows-Wheeler transformed block => index of
1314             * byte in original block */
1315            final int[] fmap; // 3600000 byte
1316            final char[] sfmap; // 3600000 byte
1317            // ------------
1318            // 8433529 byte
1319            // ============
1320    
1321            /**
1322             * Index of original line in Burrows-Wheeler table.
1323             *
1324             * <p>This is the index in fmap that points to the last byte
1325             * of the original data.</p>
1326             */
1327            int origPtr;
1328    
1329            Data(int blockSize100k) {
1330                super();
1331    
1332                final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1333                this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1334                this.fmap = new int[n];
1335                this.sfmap = new char[2 * n];
1336            }
1337    
1338        }
1339    
1340    }