001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.tar;
020    
021    import java.io.File;
022    import java.io.IOException;
023    import java.io.OutputStream;
024    import java.io.StringWriter;
025    import java.util.HashMap;
026    import java.util.Map;
027    import org.apache.commons.compress.archivers.ArchiveEntry;
028    import org.apache.commons.compress.archivers.ArchiveOutputStream;
029    import org.apache.commons.compress.archivers.zip.ZipEncoding;
030    import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
031    import org.apache.commons.compress.utils.CharsetNames;
032    import org.apache.commons.compress.utils.CountingOutputStream;
033    
034    /**
035     * The TarOutputStream writes a UNIX tar archive as an OutputStream.
036     * Methods are provided to put entries, and then write their contents
037     * by writing to this stream using write().
038     * @NotThreadSafe
039     */
040    public class TarArchiveOutputStream extends ArchiveOutputStream {
041        /** Fail if a long file name is required in the archive. */
042        public static final int LONGFILE_ERROR = 0;
043    
044        /** Long paths will be truncated in the archive. */
045        public static final int LONGFILE_TRUNCATE = 1;
046    
047        /** GNU tar extensions are used to store long file names in the archive. */
048        public static final int LONGFILE_GNU = 2;
049    
050        /** POSIX/PAX extensions are used to store long file names in the archive. */
051        public static final int LONGFILE_POSIX = 3;
052    
053        /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */
054        public static final int BIGNUMBER_ERROR = 0;
055    
056        /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */
057        public static final int BIGNUMBER_STAR = 1;
058    
059        /** POSIX/PAX extensions are used to store big numbers in the archive. */
060        public static final int BIGNUMBER_POSIX = 2;
061    
062        private long      currSize;
063        private String    currName;
064        private long      currBytes;
065        private final byte[]    recordBuf;
066        private int       assemLen;
067        private final byte[]    assemBuf;
068        protected final TarBuffer buffer;
069        private int       longFileMode = LONGFILE_ERROR;
070        private int       bigNumberMode = BIGNUMBER_ERROR;
071    
072        private boolean closed = false;
073    
074        /** Indicates if putArchiveEntry has been called without closeArchiveEntry */
075        private boolean haveUnclosedEntry = false;
076    
077        /** indicates if this archive is finished */
078        private boolean finished = false;
079    
080        private final OutputStream out;
081    
082        private final ZipEncoding encoding;
083    
084        private boolean addPaxHeadersForNonAsciiNames = false;
085        private static final ZipEncoding ASCII =
086            ZipEncodingHelper.getZipEncoding("ASCII");
087    
088        /**
089         * Constructor for TarInputStream.
090         * @param os the output stream to use
091         */
092        public TarArchiveOutputStream(OutputStream os) {
093            this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
094        }
095    
096        /**
097         * Constructor for TarInputStream.
098         * @param os the output stream to use
099         * @param encoding name of the encoding to use for file names
100         * @since Commons Compress 1.4
101         */
102        public TarArchiveOutputStream(OutputStream os, String encoding) {
103            this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
104        }
105    
106        /**
107         * Constructor for TarInputStream.
108         * @param os the output stream to use
109         * @param blockSize the block size to use
110         */
111        public TarArchiveOutputStream(OutputStream os, int blockSize) {
112            this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE);
113        }
114    
115        /**
116         * Constructor for TarInputStream.
117         * @param os the output stream to use
118         * @param blockSize the block size to use
119         * @param encoding name of the encoding to use for file names
120         * @since Commons Compress 1.4
121         */
122        public TarArchiveOutputStream(OutputStream os, int blockSize,
123                                      String encoding) {
124            this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
125        }
126    
127        /**
128         * Constructor for TarInputStream.
129         * @param os the output stream to use
130         * @param blockSize the block size to use
131         * @param recordSize the record size to use
132         */
133        public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) {
134            this(os, blockSize, recordSize, null);
135        }
136    
137        /**
138         * Constructor for TarInputStream.
139         * @param os the output stream to use
140         * @param blockSize the block size to use
141         * @param recordSize the record size to use
142         * @param encoding name of the encoding to use for file names
143         * @since Commons Compress 1.4
144         */
145        public TarArchiveOutputStream(OutputStream os, int blockSize,
146                                      int recordSize, String encoding) {
147            out = new CountingOutputStream(os);
148            this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
149    
150            this.buffer = new TarBuffer(out, blockSize, recordSize);
151            this.assemLen = 0;
152            this.assemBuf = new byte[recordSize];
153            this.recordBuf = new byte[recordSize];
154        }
155    
156        /**
157         * Set the long file mode.
158         * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2).
159         * This specifies the treatment of long file names (names >= TarConstants.NAMELEN).
160         * Default is LONGFILE_ERROR.
161         * @param longFileMode the mode to use
162         */
163        public void setLongFileMode(int longFileMode) {
164            this.longFileMode = longFileMode;
165        }
166    
167        /**
168         * Set the big number mode.
169         * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2).
170         * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header.
171         * Default is BIGNUMBER_ERROR.
172         * @param bigNumberMode the mode to use
173         * @since 1.4
174         */
175        public void setBigNumberMode(int bigNumberMode) {
176            this.bigNumberMode = bigNumberMode;
177        }
178    
179        /**
180         * Whether to add a PAX extension header for non-ASCII file names.
181         * @since 1.4
182         */
183        public void setAddPaxHeadersForNonAsciiNames(boolean b) {
184            addPaxHeadersForNonAsciiNames = b;
185        }
186    
187        @Deprecated
188        @Override
189        public int getCount() {
190            return (int) getBytesWritten();
191        }
192    
193        @Override
194        public long getBytesWritten() {
195            return ((CountingOutputStream) out).getBytesWritten();
196        }
197    
198        /**
199         * Ends the TAR archive without closing the underlying OutputStream.
200         * 
201         * An archive consists of a series of file entries terminated by an
202         * end-of-archive entry, which consists of two 512 blocks of zero bytes. 
203         * POSIX.1 requires two EOF records, like some other implementations.
204         * 
205         * @throws IOException on error
206         */
207        @Override
208        public void finish() throws IOException {
209            if (finished) {
210                throw new IOException("This archive has already been finished");
211            }
212    
213            if(haveUnclosedEntry) {
214                throw new IOException("This archives contains unclosed entries.");
215            }
216            writeEOFRecord();
217            writeEOFRecord();
218            buffer.flushBlock();
219            finished = true;
220        }
221    
222        /**
223         * Closes the underlying OutputStream.
224         * @throws IOException on error
225         */
226        @Override
227        public void close() throws IOException {
228            if(!finished) {
229                finish();
230            }
231    
232            if (!closed) {
233                buffer.close();
234                out.close();
235                closed = true;
236            }
237        }
238    
239        /**
240         * Get the record size being used by this stream's TarBuffer.
241         *
242         * @return The TarBuffer record size.
243         */
244        public int getRecordSize() {
245            return buffer.getRecordSize();
246        }
247    
248        /**
249         * Put an entry on the output stream. This writes the entry's
250         * header record and positions the output stream for writing
251         * the contents of the entry. Once this method is called, the
252         * stream is ready for calls to write() to write the entry's
253         * contents. Once the contents are written, closeArchiveEntry()
254         * <B>MUST</B> be called to ensure that all buffered data
255         * is completely written to the output stream.
256         *
257         * @param archiveEntry The TarEntry to be written to the archive.
258         * @throws IOException on error
259         * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry
260         */
261        @Override
262        public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException {
263            if(finished) {
264                throw new IOException("Stream has already been finished");
265            }
266            TarArchiveEntry entry = (TarArchiveEntry) archiveEntry;
267            Map<String, String> paxHeaders = new HashMap<String, String>();
268            final String entryName = entry.getName();
269            final byte[] nameBytes = encoding.encode(entryName).array();
270            boolean paxHeaderContainsPath = false;
271            if (nameBytes.length >= TarConstants.NAMELEN) {
272    
273                if (longFileMode == LONGFILE_POSIX) {
274                    paxHeaders.put("path", entryName);
275                    paxHeaderContainsPath = true;
276                } else if (longFileMode == LONGFILE_GNU) {
277                    // create a TarEntry for the LongLink, the contents
278                    // of which are the entry's name
279                    TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK,
280                                                                        TarConstants.LF_GNUTYPE_LONGNAME);
281    
282                    longLinkEntry.setSize(nameBytes.length + 1); // +1 for NUL
283                    putArchiveEntry(longLinkEntry);
284                    write(nameBytes);
285                    write(0); // NUL terminator
286                    closeArchiveEntry();
287                } else if (longFileMode != LONGFILE_TRUNCATE) {
288                    throw new RuntimeException("file name '" + entryName
289                                               + "' is too long ( > "
290                                               + TarConstants.NAMELEN + " bytes)");
291                }
292            }
293    
294            if (bigNumberMode == BIGNUMBER_POSIX) {
295                addPaxHeadersForBigNumbers(paxHeaders, entry);
296            } else if (bigNumberMode != BIGNUMBER_STAR) {
297                failForBigNumbers(entry);
298            }
299    
300            if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath
301                && !ASCII.canEncode(entryName)) {
302                paxHeaders.put("path", entryName);
303            }
304    
305            if (addPaxHeadersForNonAsciiNames
306                && (entry.isLink() || entry.isSymbolicLink())
307                && !ASCII.canEncode(entry.getLinkName())) {
308                paxHeaders.put("linkpath", entry.getLinkName());
309            }
310    
311            if (paxHeaders.size() > 0) {
312                writePaxHeaders(entryName, paxHeaders);
313            }
314    
315            entry.writeEntryHeader(recordBuf, encoding,
316                                   bigNumberMode == BIGNUMBER_STAR);
317            buffer.writeRecord(recordBuf);
318    
319            currBytes = 0;
320    
321            if (entry.isDirectory()) {
322                currSize = 0;
323            } else {
324                currSize = entry.getSize();
325            }
326            currName = entryName;
327            haveUnclosedEntry = true;
328        }
329    
330        /**
331         * Close an entry. This method MUST be called for all file
332         * entries that contain data. The reason is that we must
333         * buffer data written to the stream in order to satisfy
334         * the buffer's record based writes. Thus, there may be
335         * data fragments still being assembled that must be written
336         * to the output stream before this entry is closed and the
337         * next entry written.
338         * @throws IOException on error
339         */
340        @Override
341        public void closeArchiveEntry() throws IOException {
342            if(finished) {
343                throw new IOException("Stream has already been finished");
344            }
345            if (!haveUnclosedEntry){
346                throw new IOException("No current entry to close");
347            }
348            if (assemLen > 0) {
349                for (int i = assemLen; i < assemBuf.length; ++i) {
350                    assemBuf[i] = 0;
351                }
352    
353                buffer.writeRecord(assemBuf);
354    
355                currBytes += assemLen;
356                assemLen = 0;
357            }
358    
359            if (currBytes < currSize) {
360                throw new IOException("entry '" + currName + "' closed at '"
361                                      + currBytes
362                                      + "' before the '" + currSize
363                                      + "' bytes specified in the header were written");
364            }
365            haveUnclosedEntry = false;
366        }
367    
368        /**
369         * Writes bytes to the current tar archive entry. This method
370         * is aware of the current entry and will throw an exception if
371         * you attempt to write bytes past the length specified for the
372         * current entry. The method is also (painfully) aware of the
373         * record buffering required by TarBuffer, and manages buffers
374         * that are not a multiple of recordsize in length, including
375         * assembling records from small buffers.
376         *
377         * @param wBuf The buffer to write to the archive.
378         * @param wOffset The offset in the buffer from which to get bytes.
379         * @param numToWrite The number of bytes to write.
380         * @throws IOException on error
381         */
382        @Override
383        public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException {
384            if ((currBytes + numToWrite) > currSize) {
385                throw new IOException("request to write '" + numToWrite
386                                      + "' bytes exceeds size in header of '"
387                                      + currSize + "' bytes for entry '"
388                                      + currName + "'");
389    
390                //
391                // We have to deal with assembly!!!
392                // The programmer can be writing little 32 byte chunks for all
393                // we know, and we must assemble complete records for writing.
394                // REVIEW Maybe this should be in TarBuffer? Could that help to
395                // eliminate some of the buffer copying.
396                //
397            }
398    
399            if (assemLen > 0) {
400                if ((assemLen + numToWrite) >= recordBuf.length) {
401                    int aLen = recordBuf.length - assemLen;
402    
403                    System.arraycopy(assemBuf, 0, recordBuf, 0,
404                                     assemLen);
405                    System.arraycopy(wBuf, wOffset, recordBuf,
406                                     assemLen, aLen);
407                    buffer.writeRecord(recordBuf);
408    
409                    currBytes += recordBuf.length;
410                    wOffset += aLen;
411                    numToWrite -= aLen;
412                    assemLen = 0;
413                } else {
414                    System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
415                                     numToWrite);
416    
417                    wOffset += numToWrite;
418                    assemLen += numToWrite;
419                    numToWrite = 0;
420                }
421            }
422    
423            //
424            // When we get here we have EITHER:
425            // o An empty "assemble" buffer.
426            // o No bytes to write (numToWrite == 0)
427            //
428            while (numToWrite > 0) {
429                if (numToWrite < recordBuf.length) {
430                    System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
431                                     numToWrite);
432    
433                    assemLen += numToWrite;
434    
435                    break;
436                }
437    
438                buffer.writeRecord(wBuf, wOffset);
439    
440                int num = recordBuf.length;
441    
442                currBytes += num;
443                numToWrite -= num;
444                wOffset += num;
445            }
446        }
447    
448        /**
449         * Writes a PAX extended header with the given map as contents.
450         * @since 1.4
451         */
452        void writePaxHeaders(String entryName,
453                             Map<String, String> headers) throws IOException {
454            String name = "./PaxHeaders.X/" + stripTo7Bits(entryName);
455            if (name.length() >= TarConstants.NAMELEN) {
456                name = name.substring(0, TarConstants.NAMELEN - 1);
457            }
458            TarArchiveEntry pex = new TarArchiveEntry(name,
459                                                      TarConstants.LF_PAX_EXTENDED_HEADER_LC);
460    
461            StringWriter w = new StringWriter();
462            for (Map.Entry<String, String> h : headers.entrySet()) {
463                String key = h.getKey();
464                String value = h.getValue();
465                int len = key.length() + value.length()
466                    + 3 /* blank, equals and newline */
467                    + 2 /* guess 9 < actual length < 100 */;
468                String line = len + " " + key + "=" + value + "\n";
469                int actualLength = line.getBytes(CharsetNames.UTF_8).length;
470                while (len != actualLength) {
471                    // Adjust for cases where length < 10 or > 100
472                    // or where UTF-8 encoding isn't a single octet
473                    // per character.
474                    // Must be in loop as size may go from 99 to 100 in
475                    // first pass so we'd need a second.
476                    len = actualLength;
477                    line = len + " " + key + "=" + value + "\n";
478                    actualLength = line.getBytes(CharsetNames.UTF_8).length;
479                }
480                w.write(line);
481            }
482            byte[] data = w.toString().getBytes(CharsetNames.UTF_8);
483            pex.setSize(data.length);
484            putArchiveEntry(pex);
485            write(data);
486            closeArchiveEntry();
487        }
488    
489        private String stripTo7Bits(String name) {
490            final int length = name.length();
491            StringBuffer result = new StringBuffer(length);
492            for (int i = 0; i < length; i++) {
493                char stripped = (char) (name.charAt(i) & 0x7F);
494                if (stripped != 0) { // would be read as Trailing null
495                    result.append(stripped);
496                }
497            }
498            return result.toString();
499        }
500    
501        /**
502         * Write an EOF (end of archive) record to the tar archive.
503         * An EOF record consists of a record of all zeros.
504         */
505        private void writeEOFRecord() throws IOException {
506            for (int i = 0; i < recordBuf.length; ++i) {
507                recordBuf[i] = 0;
508            }
509    
510            buffer.writeRecord(recordBuf);
511        }
512    
513        @Override
514        public void flush() throws IOException {
515            out.flush();
516        }
517    
518        /** {@inheritDoc} */
519        @Override
520        public ArchiveEntry createArchiveEntry(File inputFile, String entryName)
521                throws IOException {
522            if(finished) {
523                throw new IOException("Stream has already been finished");
524            }
525            return new TarArchiveEntry(inputFile, entryName);
526        }
527    
528        private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders,
529                                                TarArchiveEntry entry) {
530            addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(),
531                                     TarConstants.MAXSIZE);
532            addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(),
533                                     TarConstants.MAXID);
534            addPaxHeaderForBigNumber(paxHeaders, "mtime",
535                                     entry.getModTime().getTime() / 1000,
536                                     TarConstants.MAXSIZE);
537            addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(),
538                                     TarConstants.MAXID);
539            // star extensions by J\u00f6rg Schilling
540            addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor",
541                                     entry.getDevMajor(), TarConstants.MAXID);
542            addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor",
543                                     entry.getDevMinor(), TarConstants.MAXID);
544            // there is no PAX header for file mode
545            failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
546        }
547    
548        private void addPaxHeaderForBigNumber(Map<String, String> paxHeaders,
549                                              String header, long value,
550                                              long maxValue) {
551            if (value < 0 || value > maxValue) {
552                paxHeaders.put(header, String.valueOf(value));
553            }
554        }
555    
556        private void failForBigNumbers(TarArchiveEntry entry) {
557            failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE);
558            failForBigNumber("group id", entry.getGroupId(), TarConstants.MAXID);
559            failForBigNumber("last modification time",
560                             entry.getModTime().getTime() / 1000,
561                             TarConstants.MAXSIZE);
562            failForBigNumber("user id", entry.getUserId(), TarConstants.MAXID);
563            failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
564            failForBigNumber("major device number", entry.getDevMajor(),
565                             TarConstants.MAXID);
566            failForBigNumber("minor device number", entry.getDevMinor(),
567                             TarConstants.MAXID);
568        }
569    
570        private void failForBigNumber(String field, long value, long maxValue) {
571            if (value < 0 || value > maxValue) {
572                throw new RuntimeException(field + " '" + value
573                                           + "' is too big ( > "
574                                           + maxValue + " )");
575            }
576        }
577    }