001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019 /* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024 package org.apache.commons.compress.archivers.tar; 025 026 import java.io.ByteArrayOutputStream; 027 import java.io.IOException; 028 import java.io.InputStream; 029 import java.util.HashMap; 030 import java.util.Map; 031 import java.util.Map.Entry; 032 033 import org.apache.commons.compress.archivers.ArchiveEntry; 034 import org.apache.commons.compress.archivers.ArchiveInputStream; 035 import org.apache.commons.compress.archivers.zip.ZipEncoding; 036 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037 import org.apache.commons.compress.utils.ArchiveUtils; 038 import org.apache.commons.compress.utils.CharsetNames; 039 040 /** 041 * The TarInputStream reads a UNIX tar archive as an InputStream. 042 * methods are provided to position at each successive entry in 043 * the archive, and the read each entry as a normal input stream 044 * using read(). 045 * @NotThreadSafe 046 */ 047 public class TarArchiveInputStream extends ArchiveInputStream { 048 private static final int SMALL_BUFFER_SIZE = 256; 049 private static final int BUFFER_SIZE = 8 * 1024; 050 051 private boolean hasHitEOF; 052 private long entrySize; 053 private long entryOffset; 054 private byte[] readBuf; 055 protected final TarBuffer buffer; 056 private TarArchiveEntry currEntry; 057 private final ZipEncoding encoding; 058 059 /** 060 * Constructor for TarInputStream. 061 * @param is the input stream to use 062 */ 063 public TarArchiveInputStream(InputStream is) { 064 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 065 } 066 067 /** 068 * Constructor for TarInputStream. 069 * @param is the input stream to use 070 * @param encoding name of the encoding to use for file names 071 * @since Commons Compress 1.4 072 */ 073 public TarArchiveInputStream(InputStream is, String encoding) { 074 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); 075 } 076 077 /** 078 * Constructor for TarInputStream. 079 * @param is the input stream to use 080 * @param blockSize the block size to use 081 */ 082 public TarArchiveInputStream(InputStream is, int blockSize) { 083 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); 084 } 085 086 /** 087 * Constructor for TarInputStream. 088 * @param is the input stream to use 089 * @param blockSize the block size to use 090 * @param encoding name of the encoding to use for file names 091 * @since Commons Compress 1.4 092 */ 093 public TarArchiveInputStream(InputStream is, int blockSize, 094 String encoding) { 095 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); 096 } 097 098 /** 099 * Constructor for TarInputStream. 100 * @param is the input stream to use 101 * @param blockSize the block size to use 102 * @param recordSize the record size to use 103 */ 104 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { 105 this(is, blockSize, recordSize, null); 106 } 107 108 /** 109 * Constructor for TarInputStream. 110 * @param is the input stream to use 111 * @param blockSize the block size to use 112 * @param recordSize the record size to use 113 * @param encoding name of the encoding to use for file names 114 * @since Commons Compress 1.4 115 */ 116 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize, 117 String encoding) { 118 this.buffer = new TarBuffer(is, blockSize, recordSize); 119 this.readBuf = null; 120 this.hasHitEOF = false; 121 this.encoding = ZipEncodingHelper.getZipEncoding(encoding); 122 } 123 124 /** 125 * Closes this stream. Calls the TarBuffer's close() method. 126 * @throws IOException on error 127 */ 128 @Override 129 public void close() throws IOException { 130 buffer.close(); 131 } 132 133 /** 134 * Get the record size being used by this stream's TarBuffer. 135 * 136 * @return The TarBuffer record size. 137 */ 138 public int getRecordSize() { 139 return buffer.getRecordSize(); 140 } 141 142 /** 143 * Get the available data that can be read from the current 144 * entry in the archive. This does not indicate how much data 145 * is left in the entire archive, only in the current entry. 146 * This value is determined from the entry's size header field 147 * and the amount of data already read from the current entry. 148 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE 149 * bytes are left in the current entry in the archive. 150 * 151 * @return The number of available bytes for the current entry. 152 * @throws IOException for signature 153 */ 154 @Override 155 public int available() throws IOException { 156 if (entrySize - entryOffset > Integer.MAX_VALUE) { 157 return Integer.MAX_VALUE; 158 } 159 return (int) (entrySize - entryOffset); 160 } 161 162 /** 163 * Skip bytes in the input buffer. This skips bytes in the 164 * current entry's data, not the entire archive, and will 165 * stop at the end of the current entry's data if the number 166 * to skip extends beyond that point. 167 * 168 * @param numToSkip The number of bytes to skip. 169 * @return the number actually skipped 170 * @throws IOException on error 171 */ 172 @Override 173 public long skip(long numToSkip) throws IOException { 174 // REVIEW 175 // This is horribly inefficient, but it ensures that we 176 // properly skip over bytes via the TarBuffer... 177 // 178 byte[] skipBuf = new byte[BUFFER_SIZE]; 179 long skip = numToSkip; 180 while (skip > 0) { 181 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); 182 int numRead = read(skipBuf, 0, realSkip); 183 if (numRead == -1) { 184 break; 185 } 186 skip -= numRead; 187 } 188 return (numToSkip - skip); 189 } 190 191 /** 192 * Since we do not support marking just yet, we do nothing. 193 */ 194 @Override 195 public synchronized void reset() { 196 } 197 198 /** 199 * Get the next entry in this tar archive. This will skip 200 * over any remaining data in the current entry, if there 201 * is one, and place the input stream at the header of the 202 * next entry, and read the header and instantiate a new 203 * TarEntry from the header bytes and return that entry. 204 * If there are no more entries in the archive, null will 205 * be returned to indicate that the end of the archive has 206 * been reached. 207 * 208 * @return The next TarEntry in the archive, or null. 209 * @throws IOException on error 210 */ 211 public TarArchiveEntry getNextTarEntry() throws IOException { 212 if (hasHitEOF) { 213 return null; 214 } 215 216 if (currEntry != null) { 217 long numToSkip = entrySize - entryOffset; 218 219 while (numToSkip > 0) { 220 long skipped = skip(numToSkip); 221 if (skipped <= 0) { 222 throw new RuntimeException("failed to skip current tar entry"); 223 } 224 numToSkip -= skipped; 225 } 226 227 readBuf = null; 228 } 229 230 byte[] headerBuf = getRecord(); 231 232 if (hasHitEOF) { 233 currEntry = null; 234 return null; 235 } 236 237 try { 238 currEntry = new TarArchiveEntry(headerBuf, encoding); 239 } catch (IllegalArgumentException e) { 240 IOException ioe = new IOException("Error detected parsing the header"); 241 ioe.initCause(e); 242 throw ioe; 243 } 244 entryOffset = 0; 245 entrySize = currEntry.getSize(); 246 247 if (currEntry.isGNULongNameEntry()) { 248 // read in the name 249 StringBuffer longName = new StringBuffer(); 250 byte[] buf = new byte[SMALL_BUFFER_SIZE]; 251 int length = 0; 252 while ((length = read(buf)) >= 0) { 253 longName.append(new String(buf, 0, length)); // TODO default charset? 254 } 255 getNextEntry(); 256 if (currEntry == null) { 257 // Bugzilla: 40334 258 // Malformed tar file - long entry name not followed by entry 259 return null; 260 } 261 // remove trailing null terminator 262 if (longName.length() > 0 263 && longName.charAt(longName.length() - 1) == 0) { 264 longName.deleteCharAt(longName.length() - 1); 265 } 266 currEntry.setName(longName.toString()); 267 } 268 269 if (currEntry.isPaxHeader()){ // Process Pax headers 270 paxHeaders(); 271 } 272 273 if (currEntry.isGNUSparse()){ // Process sparse files 274 readGNUSparse(); 275 } 276 277 // If the size of the next element in the archive has changed 278 // due to a new size being reported in the posix header 279 // information, we update entrySize here so that it contains 280 // the correct value. 281 entrySize = currEntry.getSize(); 282 return currEntry; 283 } 284 285 /** 286 * Get the next record in this tar archive. This will skip 287 * over any remaining data in the current entry, if there 288 * is one, and place the input stream at the header of the 289 * next entry. 290 * If there are no more entries in the archive, null will 291 * be returned to indicate that the end of the archive has 292 * been reached. 293 * 294 * @return The next header in the archive, or null. 295 * @throws IOException on error 296 */ 297 private byte[] getRecord() throws IOException { 298 if (hasHitEOF) { 299 return null; 300 } 301 302 byte[] headerBuf = buffer.readRecord(); 303 304 if (headerBuf == null) { 305 hasHitEOF = true; 306 } else if (buffer.isEOFRecord(headerBuf)) { 307 hasHitEOF = true; 308 } 309 310 return hasHitEOF ? null : headerBuf; 311 } 312 313 private void paxHeaders() throws IOException{ 314 Map<String, String> headers = parsePaxHeaders(this); 315 getNextEntry(); // Get the actual file entry 316 applyPaxHeadersToCurrentEntry(headers); 317 } 318 319 Map<String, String> parsePaxHeaders(InputStream i) throws IOException { 320 Map<String, String> headers = new HashMap<String, String>(); 321 // Format is "length keyword=value\n"; 322 while(true){ // get length 323 int ch; 324 int len = 0; 325 int read = 0; 326 while((ch = i.read()) != -1) { 327 read++; 328 if (ch == ' '){ // End of length string 329 // Get keyword 330 ByteArrayOutputStream coll = new ByteArrayOutputStream(); 331 while((ch = i.read()) != -1) { 332 read++; 333 if (ch == '='){ // end of keyword 334 String keyword = coll.toString(CharsetNames.UTF_8); 335 // Get rest of entry 336 byte[] rest = new byte[len - read]; 337 int got = i.read(rest); 338 if (got != len - read){ 339 throw new IOException("Failed to read " 340 + "Paxheader. Expected " 341 + (len - read) 342 + " bytes, read " 343 + got); 344 } 345 // Drop trailing NL 346 String value = new String(rest, 0, 347 len - read - 1, CharsetNames.UTF_8); 348 headers.put(keyword, value); 349 break; 350 } 351 coll.write((byte) ch); 352 } 353 break; // Processed single header 354 } 355 len *= 10; 356 len += ch - '0'; 357 } 358 if (ch == -1){ // EOF 359 break; 360 } 361 } 362 return headers; 363 } 364 365 private void applyPaxHeadersToCurrentEntry(Map<String, String> headers) { 366 /* 367 * The following headers are defined for Pax. 368 * atime, ctime, charset: cannot use these without changing TarArchiveEntry fields 369 * mtime 370 * comment 371 * gid, gname 372 * linkpath 373 * size 374 * uid,uname 375 * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those 376 */ 377 for (Entry<String, String> ent : headers.entrySet()){ 378 String key = ent.getKey(); 379 String val = ent.getValue(); 380 if ("path".equals(key)){ 381 currEntry.setName(val); 382 } else if ("linkpath".equals(key)){ 383 currEntry.setLinkName(val); 384 } else if ("gid".equals(key)){ 385 currEntry.setGroupId(Integer.parseInt(val)); 386 } else if ("gname".equals(key)){ 387 currEntry.setGroupName(val); 388 } else if ("uid".equals(key)){ 389 currEntry.setUserId(Integer.parseInt(val)); 390 } else if ("uname".equals(key)){ 391 currEntry.setUserName(val); 392 } else if ("size".equals(key)){ 393 currEntry.setSize(Long.parseLong(val)); 394 } else if ("mtime".equals(key)){ 395 currEntry.setModTime((long) (Double.parseDouble(val) * 1000)); 396 } else if ("SCHILY.devminor".equals(key)){ 397 currEntry.setDevMinor(Integer.parseInt(val)); 398 } else if ("SCHILY.devmajor".equals(key)){ 399 currEntry.setDevMajor(Integer.parseInt(val)); 400 } 401 } 402 } 403 404 /** 405 * Adds the sparse chunks from the current entry to the sparse chunks, 406 * including any additional sparse entries following the current entry. 407 * 408 * @throws IOException on error 409 * 410 * @todo Sparse files get not yet really processed. 411 */ 412 private void readGNUSparse() throws IOException { 413 /* we do not really process sparse files yet 414 sparses = new ArrayList(); 415 sparses.addAll(currEntry.getSparses()); 416 */ 417 if (currEntry.isExtended()) { 418 TarArchiveSparseEntry entry; 419 do { 420 byte[] headerBuf = getRecord(); 421 if (hasHitEOF) { 422 currEntry = null; 423 break; 424 } 425 entry = new TarArchiveSparseEntry(headerBuf); 426 /* we do not really process sparse files yet 427 sparses.addAll(entry.getSparses()); 428 */ 429 } while (entry.isExtended()); 430 } 431 } 432 433 @Override 434 public ArchiveEntry getNextEntry() throws IOException { 435 return getNextTarEntry(); 436 } 437 438 /** 439 * Reads bytes from the current tar archive entry. 440 * 441 * This method is aware of the boundaries of the current 442 * entry in the archive and will deal with them as if they 443 * were this stream's start and EOF. 444 * 445 * @param buf The buffer into which to place bytes read. 446 * @param offset The offset at which to place bytes read. 447 * @param numToRead The number of bytes to read. 448 * @return The number of bytes read, or -1 at EOF. 449 * @throws IOException on error 450 */ 451 @Override 452 public int read(byte[] buf, int offset, int numToRead) throws IOException { 453 int totalRead = 0; 454 455 if (entryOffset >= entrySize) { 456 return -1; 457 } 458 459 if ((numToRead + entryOffset) > entrySize) { 460 numToRead = (int) (entrySize - entryOffset); 461 } 462 463 if (readBuf != null) { 464 int sz = (numToRead > readBuf.length) ? readBuf.length 465 : numToRead; 466 467 System.arraycopy(readBuf, 0, buf, offset, sz); 468 469 if (sz >= readBuf.length) { 470 readBuf = null; 471 } else { 472 int newLen = readBuf.length - sz; 473 byte[] newBuf = new byte[newLen]; 474 475 System.arraycopy(readBuf, sz, newBuf, 0, newLen); 476 477 readBuf = newBuf; 478 } 479 480 totalRead += sz; 481 numToRead -= sz; 482 offset += sz; 483 } 484 485 while (numToRead > 0) { 486 byte[] rec = buffer.readRecord(); 487 488 if (rec == null) { 489 // Unexpected EOF! 490 throw new IOException("unexpected EOF with " + numToRead 491 + " bytes unread. Occured at byte: " + getBytesRead()); 492 } 493 count(rec.length); 494 int sz = numToRead; 495 int recLen = rec.length; 496 497 if (recLen > sz) { 498 System.arraycopy(rec, 0, buf, offset, sz); 499 500 readBuf = new byte[recLen - sz]; 501 502 System.arraycopy(rec, sz, readBuf, 0, recLen - sz); 503 } else { 504 sz = recLen; 505 506 System.arraycopy(rec, 0, buf, offset, recLen); 507 } 508 509 totalRead += sz; 510 numToRead -= sz; 511 offset += sz; 512 } 513 514 entryOffset += totalRead; 515 516 return totalRead; 517 } 518 519 /** 520 * Whether this class is able to read the given entry. 521 * 522 * <p>May return false if the current entry is a sparse file.</p> 523 */ 524 @Override 525 public boolean canReadEntryData(ArchiveEntry ae) { 526 if (ae instanceof TarArchiveEntry) { 527 TarArchiveEntry te = (TarArchiveEntry) ae; 528 return !te.isGNUSparse(); 529 } 530 return false; 531 } 532 533 protected final TarArchiveEntry getCurrentEntry() { 534 return currEntry; 535 } 536 537 protected final void setCurrentEntry(TarArchiveEntry e) { 538 currEntry = e; 539 } 540 541 protected final boolean isAtEOF() { 542 return hasHitEOF; 543 } 544 545 protected final void setAtEOF(boolean b) { 546 hasHitEOF = b; 547 } 548 549 /** 550 * Checks if the signature matches what is expected for a tar file. 551 * 552 * @param signature 553 * the bytes to check 554 * @param length 555 * the number of bytes to check 556 * @return true, if this stream is a tar archive stream, false otherwise 557 */ 558 public static boolean matches(byte[] signature, int length) { 559 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 560 return false; 561 } 562 563 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 564 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 565 && 566 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 567 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 568 ){ 569 return true; 570 } 571 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 572 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 573 && 574 ( 575 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 576 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 577 || 578 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 579 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 580 ) 581 ){ 582 return true; 583 } 584 // COMPRESS-107 - recognise Ant tar files 585 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 586 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 587 && 588 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 589 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 590 ){ 591 return true; 592 } 593 return false; 594 } 595 596 }