001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.tar; 020 021 import java.io.File; 022 import java.io.IOException; 023 import java.io.OutputStream; 024 import java.io.StringWriter; 025 import java.util.HashMap; 026 import java.util.Map; 027 import org.apache.commons.compress.archivers.ArchiveEntry; 028 import org.apache.commons.compress.archivers.ArchiveOutputStream; 029 import org.apache.commons.compress.archivers.zip.ZipEncoding; 030 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 031 import org.apache.commons.compress.utils.CharsetNames; 032 import org.apache.commons.compress.utils.CountingOutputStream; 033 034 /** 035 * The TarOutputStream writes a UNIX tar archive as an OutputStream. 036 * Methods are provided to put entries, and then write their contents 037 * by writing to this stream using write(). 038 * @NotThreadSafe 039 */ 040 public class TarArchiveOutputStream extends ArchiveOutputStream { 041 /** Fail if a long file name is required in the archive. */ 042 public static final int LONGFILE_ERROR = 0; 043 044 /** Long paths will be truncated in the archive. */ 045 public static final int LONGFILE_TRUNCATE = 1; 046 047 /** GNU tar extensions are used to store long file names in the archive. */ 048 public static final int LONGFILE_GNU = 2; 049 050 /** POSIX/PAX extensions are used to store long file names in the archive. */ 051 public static final int LONGFILE_POSIX = 3; 052 053 /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */ 054 public static final int BIGNUMBER_ERROR = 0; 055 056 /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */ 057 public static final int BIGNUMBER_STAR = 1; 058 059 /** POSIX/PAX extensions are used to store big numbers in the archive. */ 060 public static final int BIGNUMBER_POSIX = 2; 061 062 private long currSize; 063 private String currName; 064 private long currBytes; 065 private final byte[] recordBuf; 066 private int assemLen; 067 private final byte[] assemBuf; 068 protected final TarBuffer buffer; 069 private int longFileMode = LONGFILE_ERROR; 070 private int bigNumberMode = BIGNUMBER_ERROR; 071 072 private boolean closed = false; 073 074 /** Indicates if putArchiveEntry has been called without closeArchiveEntry */ 075 private boolean haveUnclosedEntry = false; 076 077 /** indicates if this archive is finished */ 078 private boolean finished = false; 079 080 private final OutputStream out; 081 082 private final ZipEncoding encoding; 083 084 private boolean addPaxHeadersForNonAsciiNames = false; 085 private static final ZipEncoding ASCII = 086 ZipEncodingHelper.getZipEncoding("ASCII"); 087 088 /** 089 * Constructor for TarInputStream. 090 * @param os the output stream to use 091 */ 092 public TarArchiveOutputStream(OutputStream os) { 093 this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 094 } 095 096 /** 097 * Constructor for TarInputStream. 098 * @param os the output stream to use 099 * @param encoding name of the encoding to use for file names 100 * @since Commons Compress 1.4 101 */ 102 public TarArchiveOutputStream(OutputStream os, String encoding) { 103 this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); 104 } 105 106 /** 107 * Constructor for TarInputStream. 108 * @param os the output stream to use 109 * @param blockSize the block size to use 110 */ 111 public TarArchiveOutputStream(OutputStream os, int blockSize) { 112 this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE); 113 } 114 115 /** 116 * Constructor for TarInputStream. 117 * @param os the output stream to use 118 * @param blockSize the block size to use 119 * @param encoding name of the encoding to use for file names 120 * @since Commons Compress 1.4 121 */ 122 public TarArchiveOutputStream(OutputStream os, int blockSize, 123 String encoding) { 124 this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); 125 } 126 127 /** 128 * Constructor for TarInputStream. 129 * @param os the output stream to use 130 * @param blockSize the block size to use 131 * @param recordSize the record size to use 132 */ 133 public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) { 134 this(os, blockSize, recordSize, null); 135 } 136 137 /** 138 * Constructor for TarInputStream. 139 * @param os the output stream to use 140 * @param blockSize the block size to use 141 * @param recordSize the record size to use 142 * @param encoding name of the encoding to use for file names 143 * @since Commons Compress 1.4 144 */ 145 public TarArchiveOutputStream(OutputStream os, int blockSize, 146 int recordSize, String encoding) { 147 out = new CountingOutputStream(os); 148 this.encoding = ZipEncodingHelper.getZipEncoding(encoding); 149 150 this.buffer = new TarBuffer(out, blockSize, recordSize); 151 this.assemLen = 0; 152 this.assemBuf = new byte[recordSize]; 153 this.recordBuf = new byte[recordSize]; 154 } 155 156 /** 157 * Set the long file mode. 158 * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2). 159 * This specifies the treatment of long file names (names >= TarConstants.NAMELEN). 160 * Default is LONGFILE_ERROR. 161 * @param longFileMode the mode to use 162 */ 163 public void setLongFileMode(int longFileMode) { 164 this.longFileMode = longFileMode; 165 } 166 167 /** 168 * Set the big number mode. 169 * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2). 170 * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header. 171 * Default is BIGNUMBER_ERROR. 172 * @param bigNumberMode the mode to use 173 * @since 1.4 174 */ 175 public void setBigNumberMode(int bigNumberMode) { 176 this.bigNumberMode = bigNumberMode; 177 } 178 179 /** 180 * Whether to add a PAX extension header for non-ASCII file names. 181 * @since 1.4 182 */ 183 public void setAddPaxHeadersForNonAsciiNames(boolean b) { 184 addPaxHeadersForNonAsciiNames = b; 185 } 186 187 @Deprecated 188 @Override 189 public int getCount() { 190 return (int) getBytesWritten(); 191 } 192 193 @Override 194 public long getBytesWritten() { 195 return ((CountingOutputStream) out).getBytesWritten(); 196 } 197 198 /** 199 * Ends the TAR archive without closing the underlying OutputStream. 200 * 201 * An archive consists of a series of file entries terminated by an 202 * end-of-archive entry, which consists of two 512 blocks of zero bytes. 203 * POSIX.1 requires two EOF records, like some other implementations. 204 * 205 * @throws IOException on error 206 */ 207 @Override 208 public void finish() throws IOException { 209 if (finished) { 210 throw new IOException("This archive has already been finished"); 211 } 212 213 if(haveUnclosedEntry) { 214 throw new IOException("This archives contains unclosed entries."); 215 } 216 writeEOFRecord(); 217 writeEOFRecord(); 218 buffer.flushBlock(); 219 finished = true; 220 } 221 222 /** 223 * Closes the underlying OutputStream. 224 * @throws IOException on error 225 */ 226 @Override 227 public void close() throws IOException { 228 if(!finished) { 229 finish(); 230 } 231 232 if (!closed) { 233 buffer.close(); 234 out.close(); 235 closed = true; 236 } 237 } 238 239 /** 240 * Get the record size being used by this stream's TarBuffer. 241 * 242 * @return The TarBuffer record size. 243 */ 244 public int getRecordSize() { 245 return buffer.getRecordSize(); 246 } 247 248 /** 249 * Put an entry on the output stream. This writes the entry's 250 * header record and positions the output stream for writing 251 * the contents of the entry. Once this method is called, the 252 * stream is ready for calls to write() to write the entry's 253 * contents. Once the contents are written, closeArchiveEntry() 254 * <B>MUST</B> be called to ensure that all buffered data 255 * is completely written to the output stream. 256 * 257 * @param archiveEntry The TarEntry to be written to the archive. 258 * @throws IOException on error 259 * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry 260 */ 261 @Override 262 public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException { 263 if(finished) { 264 throw new IOException("Stream has already been finished"); 265 } 266 TarArchiveEntry entry = (TarArchiveEntry) archiveEntry; 267 Map<String, String> paxHeaders = new HashMap<String, String>(); 268 final String entryName = entry.getName(); 269 final byte[] nameBytes = encoding.encode(entryName).array(); 270 boolean paxHeaderContainsPath = false; 271 if (nameBytes.length >= TarConstants.NAMELEN) { 272 273 if (longFileMode == LONGFILE_POSIX) { 274 paxHeaders.put("path", entryName); 275 paxHeaderContainsPath = true; 276 } else if (longFileMode == LONGFILE_GNU) { 277 // create a TarEntry for the LongLink, the contents 278 // of which are the entry's name 279 TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK, 280 TarConstants.LF_GNUTYPE_LONGNAME); 281 282 longLinkEntry.setSize(nameBytes.length + 1); // +1 for NUL 283 putArchiveEntry(longLinkEntry); 284 write(nameBytes); 285 write(0); // NUL terminator 286 closeArchiveEntry(); 287 } else if (longFileMode != LONGFILE_TRUNCATE) { 288 throw new RuntimeException("file name '" + entryName 289 + "' is too long ( > " 290 + TarConstants.NAMELEN + " bytes)"); 291 } 292 } 293 294 if (bigNumberMode == BIGNUMBER_POSIX) { 295 addPaxHeadersForBigNumbers(paxHeaders, entry); 296 } else if (bigNumberMode != BIGNUMBER_STAR) { 297 failForBigNumbers(entry); 298 } 299 300 if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath 301 && !ASCII.canEncode(entryName)) { 302 paxHeaders.put("path", entryName); 303 } 304 305 if (addPaxHeadersForNonAsciiNames 306 && (entry.isLink() || entry.isSymbolicLink()) 307 && !ASCII.canEncode(entry.getLinkName())) { 308 paxHeaders.put("linkpath", entry.getLinkName()); 309 } 310 311 if (paxHeaders.size() > 0) { 312 writePaxHeaders(entryName, paxHeaders); 313 } 314 315 entry.writeEntryHeader(recordBuf, encoding, 316 bigNumberMode == BIGNUMBER_STAR); 317 buffer.writeRecord(recordBuf); 318 319 currBytes = 0; 320 321 if (entry.isDirectory()) { 322 currSize = 0; 323 } else { 324 currSize = entry.getSize(); 325 } 326 currName = entryName; 327 haveUnclosedEntry = true; 328 } 329 330 /** 331 * Close an entry. This method MUST be called for all file 332 * entries that contain data. The reason is that we must 333 * buffer data written to the stream in order to satisfy 334 * the buffer's record based writes. Thus, there may be 335 * data fragments still being assembled that must be written 336 * to the output stream before this entry is closed and the 337 * next entry written. 338 * @throws IOException on error 339 */ 340 @Override 341 public void closeArchiveEntry() throws IOException { 342 if(finished) { 343 throw new IOException("Stream has already been finished"); 344 } 345 if (!haveUnclosedEntry){ 346 throw new IOException("No current entry to close"); 347 } 348 if (assemLen > 0) { 349 for (int i = assemLen; i < assemBuf.length; ++i) { 350 assemBuf[i] = 0; 351 } 352 353 buffer.writeRecord(assemBuf); 354 355 currBytes += assemLen; 356 assemLen = 0; 357 } 358 359 if (currBytes < currSize) { 360 throw new IOException("entry '" + currName + "' closed at '" 361 + currBytes 362 + "' before the '" + currSize 363 + "' bytes specified in the header were written"); 364 } 365 haveUnclosedEntry = false; 366 } 367 368 /** 369 * Writes bytes to the current tar archive entry. This method 370 * is aware of the current entry and will throw an exception if 371 * you attempt to write bytes past the length specified for the 372 * current entry. The method is also (painfully) aware of the 373 * record buffering required by TarBuffer, and manages buffers 374 * that are not a multiple of recordsize in length, including 375 * assembling records from small buffers. 376 * 377 * @param wBuf The buffer to write to the archive. 378 * @param wOffset The offset in the buffer from which to get bytes. 379 * @param numToWrite The number of bytes to write. 380 * @throws IOException on error 381 */ 382 @Override 383 public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException { 384 if ((currBytes + numToWrite) > currSize) { 385 throw new IOException("request to write '" + numToWrite 386 + "' bytes exceeds size in header of '" 387 + currSize + "' bytes for entry '" 388 + currName + "'"); 389 390 // 391 // We have to deal with assembly!!! 392 // The programmer can be writing little 32 byte chunks for all 393 // we know, and we must assemble complete records for writing. 394 // REVIEW Maybe this should be in TarBuffer? Could that help to 395 // eliminate some of the buffer copying. 396 // 397 } 398 399 if (assemLen > 0) { 400 if ((assemLen + numToWrite) >= recordBuf.length) { 401 int aLen = recordBuf.length - assemLen; 402 403 System.arraycopy(assemBuf, 0, recordBuf, 0, 404 assemLen); 405 System.arraycopy(wBuf, wOffset, recordBuf, 406 assemLen, aLen); 407 buffer.writeRecord(recordBuf); 408 409 currBytes += recordBuf.length; 410 wOffset += aLen; 411 numToWrite -= aLen; 412 assemLen = 0; 413 } else { 414 System.arraycopy(wBuf, wOffset, assemBuf, assemLen, 415 numToWrite); 416 417 wOffset += numToWrite; 418 assemLen += numToWrite; 419 numToWrite = 0; 420 } 421 } 422 423 // 424 // When we get here we have EITHER: 425 // o An empty "assemble" buffer. 426 // o No bytes to write (numToWrite == 0) 427 // 428 while (numToWrite > 0) { 429 if (numToWrite < recordBuf.length) { 430 System.arraycopy(wBuf, wOffset, assemBuf, assemLen, 431 numToWrite); 432 433 assemLen += numToWrite; 434 435 break; 436 } 437 438 buffer.writeRecord(wBuf, wOffset); 439 440 int num = recordBuf.length; 441 442 currBytes += num; 443 numToWrite -= num; 444 wOffset += num; 445 } 446 } 447 448 /** 449 * Writes a PAX extended header with the given map as contents. 450 * @since 1.4 451 */ 452 void writePaxHeaders(String entryName, 453 Map<String, String> headers) throws IOException { 454 String name = "./PaxHeaders.X/" + stripTo7Bits(entryName); 455 if (name.length() >= TarConstants.NAMELEN) { 456 name = name.substring(0, TarConstants.NAMELEN - 1); 457 } 458 TarArchiveEntry pex = new TarArchiveEntry(name, 459 TarConstants.LF_PAX_EXTENDED_HEADER_LC); 460 461 StringWriter w = new StringWriter(); 462 for (Map.Entry<String, String> h : headers.entrySet()) { 463 String key = h.getKey(); 464 String value = h.getValue(); 465 int len = key.length() + value.length() 466 + 3 /* blank, equals and newline */ 467 + 2 /* guess 9 < actual length < 100 */; 468 String line = len + " " + key + "=" + value + "\n"; 469 int actualLength = line.getBytes(CharsetNames.UTF_8).length; 470 while (len != actualLength) { 471 // Adjust for cases where length < 10 or > 100 472 // or where UTF-8 encoding isn't a single octet 473 // per character. 474 // Must be in loop as size may go from 99 to 100 in 475 // first pass so we'd need a second. 476 len = actualLength; 477 line = len + " " + key + "=" + value + "\n"; 478 actualLength = line.getBytes(CharsetNames.UTF_8).length; 479 } 480 w.write(line); 481 } 482 byte[] data = w.toString().getBytes(CharsetNames.UTF_8); 483 pex.setSize(data.length); 484 putArchiveEntry(pex); 485 write(data); 486 closeArchiveEntry(); 487 } 488 489 private String stripTo7Bits(String name) { 490 final int length = name.length(); 491 StringBuffer result = new StringBuffer(length); 492 for (int i = 0; i < length; i++) { 493 char stripped = (char) (name.charAt(i) & 0x7F); 494 if (stripped != 0) { // would be read as Trailing null 495 result.append(stripped); 496 } 497 } 498 return result.toString(); 499 } 500 501 /** 502 * Write an EOF (end of archive) record to the tar archive. 503 * An EOF record consists of a record of all zeros. 504 */ 505 private void writeEOFRecord() throws IOException { 506 for (int i = 0; i < recordBuf.length; ++i) { 507 recordBuf[i] = 0; 508 } 509 510 buffer.writeRecord(recordBuf); 511 } 512 513 @Override 514 public void flush() throws IOException { 515 out.flush(); 516 } 517 518 /** {@inheritDoc} */ 519 @Override 520 public ArchiveEntry createArchiveEntry(File inputFile, String entryName) 521 throws IOException { 522 if(finished) { 523 throw new IOException("Stream has already been finished"); 524 } 525 return new TarArchiveEntry(inputFile, entryName); 526 } 527 528 private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders, 529 TarArchiveEntry entry) { 530 addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(), 531 TarConstants.MAXSIZE); 532 addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(), 533 TarConstants.MAXID); 534 addPaxHeaderForBigNumber(paxHeaders, "mtime", 535 entry.getModTime().getTime() / 1000, 536 TarConstants.MAXSIZE); 537 addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(), 538 TarConstants.MAXID); 539 // star extensions by J\u00f6rg Schilling 540 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor", 541 entry.getDevMajor(), TarConstants.MAXID); 542 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor", 543 entry.getDevMinor(), TarConstants.MAXID); 544 // there is no PAX header for file mode 545 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); 546 } 547 548 private void addPaxHeaderForBigNumber(Map<String, String> paxHeaders, 549 String header, long value, 550 long maxValue) { 551 if (value < 0 || value > maxValue) { 552 paxHeaders.put(header, String.valueOf(value)); 553 } 554 } 555 556 private void failForBigNumbers(TarArchiveEntry entry) { 557 failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE); 558 failForBigNumber("group id", entry.getGroupId(), TarConstants.MAXID); 559 failForBigNumber("last modification time", 560 entry.getModTime().getTime() / 1000, 561 TarConstants.MAXSIZE); 562 failForBigNumber("user id", entry.getUserId(), TarConstants.MAXID); 563 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); 564 failForBigNumber("major device number", entry.getDevMajor(), 565 TarConstants.MAXID); 566 failForBigNumber("minor device number", entry.getDevMinor(), 567 TarConstants.MAXID); 568 } 569 570 private void failForBigNumber(String field, long value, long maxValue) { 571 if (value < 0 || value > maxValue) { 572 throw new RuntimeException(field + " '" + value 573 + "' is too big ( > " 574 + maxValue + " )"); 575 } 576 } 577 }