001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.tar;
020    
021    import java.io.IOException;
022    import java.math.BigInteger;
023    import java.nio.ByteBuffer;
024    import org.apache.commons.compress.archivers.zip.ZipEncoding;
025    import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
026    
027    /**
028     * This class provides static utility methods to work with byte streams.
029     *
030     * @Immutable
031     */
032    // CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
033    public class TarUtils {
034    
035        private static final int BYTE_MASK = 255;
036    
037        static final ZipEncoding DEFAULT_ENCODING =
038            ZipEncodingHelper.getZipEncoding(null);
039    
040        /**
041         * Encapsulates the algorithms used up to Commons Compress 1.3 as
042         * ZipEncoding.
043         */
044        static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
045                public boolean canEncode(String name) { return true; }
046    
047                public ByteBuffer encode(String name) {
048                    final int length = name.length();
049                    byte[] buf = new byte[length];
050    
051                    // copy until end of input or output is reached.
052                    for (int i = 0; i < length; ++i) {
053                        buf[i] = (byte) name.charAt(i);
054                    }
055                    return ByteBuffer.wrap(buf);
056                }
057    
058                public String decode(byte[] buffer) {
059                    final int length = buffer.length;
060                    StringBuffer result = new StringBuffer(length);
061    
062                    for (int i = 0; i < length; ++i) {
063                        byte b = buffer[i];
064                        if (b == 0) { // Trailing null
065                            break;
066                        }
067                        result.append((char) (b & 0xFF)); // Allow for sign-extension
068                    }
069    
070                    return result.toString();
071                }
072            };
073    
074        /** Private constructor to prevent instantiation of this utility class. */
075        private TarUtils(){
076        }
077    
078        /**
079         * Parse an octal string from a buffer.
080         *
081         * <p>Leading spaces are ignored.
082         * The buffer must contain a trailing space or NUL,
083         * and may contain an additional trailing space or NUL.</p>
084         *
085         * <p>The input buffer is allowed to contain all NULs,
086         * in which case the method returns 0L
087         * (this allows for missing fields).</p>
088         *
089         * <p>To work-around some tar implementations that insert a
090         * leading NUL this method returns 0 if it detects a leading NUL
091         * since Commons Compress 1.4.</p>
092         *
093         * @param buffer The buffer from which to parse.
094         * @param offset The offset into the buffer from which to parse.
095         * @param length The maximum number of bytes to parse - must be at least 2 bytes.
096         * @return The long value of the octal string.
097         * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
098         */
099        public static long parseOctal(final byte[] buffer, final int offset, final int length) {
100            long    result = 0;
101            int     end = offset + length;
102            int     start = offset;
103    
104            if (length < 2){
105                throw new IllegalArgumentException("Length "+length+" must be at least 2");
106            }
107    
108            if (buffer[start] == 0) {
109                return 0L;
110            }
111    
112            // Skip leading spaces
113            while (start < end){
114                if (buffer[start] == ' '){
115                    start++;
116                } else {
117                    break;
118                }
119            }
120    
121            // Must have trailing NUL or space
122            byte trailer;
123            trailer = buffer[end-1];
124            if (trailer == 0 || trailer == ' '){
125                end--;
126            } else {
127                throw new IllegalArgumentException(
128                        exceptionMessage(buffer, offset, length, end-1, trailer));
129            }
130            // May have additional NUL or space
131            trailer = buffer[end-1];
132            if (trailer == 0 || trailer == ' '){
133                end--;
134            }
135    
136            for ( ;start < end; start++) {
137                final byte currentByte = buffer[start];
138                // CheckStyle:MagicNumber OFF
139                if (currentByte < '0' || currentByte > '7'){
140                    throw new IllegalArgumentException(
141                            exceptionMessage(buffer, offset, length, start, currentByte));
142                }
143                result = (result << 3) + (currentByte - '0'); // convert from ASCII
144                // CheckStyle:MagicNumber ON
145            }
146    
147            return result;
148        }
149    
150        /** 
151         * Compute the value contained in a byte buffer.  If the most
152         * significant bit of the first byte in the buffer is set, this
153         * bit is ignored and the rest of the buffer is interpreted as a
154         * binary number.  Otherwise, the buffer is interpreted as an
155         * octal number as per the parseOctal function above.
156         *
157         * @param buffer The buffer from which to parse.
158         * @param offset The offset into the buffer from which to parse.
159         * @param length The maximum number of bytes to parse.
160         * @return The long value of the octal or binary string.
161         * @throws IllegalArgumentException if the trailing space/NUL is
162         * missing or an invalid byte is detected in an octal number, or
163         * if a binary number would exceed the size of a signed long
164         * 64-bit integer.
165         * @since 1.4
166         */
167        public static long parseOctalOrBinary(final byte[] buffer, final int offset,
168                                              final int length) {
169    
170            if ((buffer[offset] & 0x80) == 0) {
171                return parseOctal(buffer, offset, length);
172            }
173            final boolean negative = buffer[offset] == (byte) 0xff;
174            if (length < 9) {
175                return parseBinaryLong(buffer, offset, length, negative);
176            }
177            return parseBinaryBigInteger(buffer, offset, length, negative);
178        }
179    
180        private static long parseBinaryLong(final byte[] buffer, final int offset,
181                                            final int length,
182                                            final boolean negative) {
183            if (length >= 9) {
184                throw new IllegalArgumentException("At offset " + offset + ", "
185                                                   + length + " byte binary number"
186                                                   + " exceeds maximum signed long"
187                                                   + " value");
188            }
189            long val = 0;
190            for (int i = 1; i < length; i++) {
191                val = (val << 8) + (buffer[offset + i] & 0xff);
192            }
193            if (negative) {
194                // 2's complement
195                val--;
196                val ^= ((long) Math.pow(2, (length - 1) * 8) - 1);
197            }
198            return negative ? -val : val;
199        }
200    
201        private static long parseBinaryBigInteger(final byte[] buffer,
202                                                  final int offset,
203                                                  final int length,
204                                                  final boolean negative) {
205            byte[] remainder = new byte[length - 1];
206            System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
207            BigInteger val = new BigInteger(remainder);
208            if (negative) {
209                // 2's complement
210                val = val.add(BigInteger.valueOf(-1)).not();
211            }
212            if (val.bitLength() > 63) {
213                throw new IllegalArgumentException("At offset " + offset + ", "
214                                                   + length + " byte binary number"
215                                                   + " exceeds maximum signed long"
216                                                   + " value");
217            }
218            return negative ? -val.longValue() : val.longValue();
219        }
220    
221        /**
222         * Parse a boolean byte from a buffer.
223         * Leading spaces and NUL are ignored.
224         * The buffer may contain trailing spaces or NULs.
225         *
226         * @param buffer The buffer from which to parse.
227         * @param offset The offset into the buffer from which to parse.
228         * @return The boolean value of the bytes.
229         * @throws IllegalArgumentException if an invalid byte is detected.
230         */
231        public static boolean parseBoolean(final byte[] buffer, final int offset) {
232            return buffer[offset] == 1;
233        }
234    
235        // Helper method to generate the exception message
236        private static String exceptionMessage(byte[] buffer, final int offset,
237                final int length, int current, final byte currentByte) {
238            String string = new String(buffer, offset, length); // TODO default charset?
239            string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
240            final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
241            return s;
242        }
243    
244        /**
245         * Parse an entry name from a buffer.
246         * Parsing stops when a NUL is found
247         * or the buffer length is reached.
248         *
249         * @param buffer The buffer from which to parse.
250         * @param offset The offset into the buffer from which to parse.
251         * @param length The maximum number of bytes to parse.
252         * @return The entry name.
253         */
254        public static String parseName(byte[] buffer, final int offset, final int length) {
255            try {
256                return parseName(buffer, offset, length, DEFAULT_ENCODING);
257            } catch (IOException ex) {
258                try {
259                    return parseName(buffer, offset, length, FALLBACK_ENCODING);
260                } catch (IOException ex2) {
261                    // impossible
262                    throw new RuntimeException(ex2);
263                }
264            }
265        }
266    
267        /**
268         * Parse an entry name from a buffer.
269         * Parsing stops when a NUL is found
270         * or the buffer length is reached.
271         *
272         * @param buffer The buffer from which to parse.
273         * @param offset The offset into the buffer from which to parse.
274         * @param length The maximum number of bytes to parse.
275         * @param encoding name of the encoding to use for file names
276         * @since Commons Compress 1.4
277         * @return The entry name.
278         */
279        public static String parseName(byte[] buffer, final int offset,
280                                       final int length,
281                                       final ZipEncoding encoding)
282            throws IOException {
283    
284            int len = length;
285            for (; len > 0; len--) {
286                if (buffer[offset + len - 1] != 0) {
287                    break;
288                }
289            }
290            if (len > 0) {
291                byte[] b = new byte[len];
292                System.arraycopy(buffer, offset, b, 0, len);
293                return encoding.decode(b);
294            }
295            return "";
296        }
297    
298        /**
299         * Copy a name into a buffer.
300         * Copies characters from the name into the buffer
301         * starting at the specified offset. 
302         * If the buffer is longer than the name, the buffer
303         * is filled with trailing NULs.
304         * If the name is longer than the buffer,
305         * the output is truncated.
306         *
307         * @param name The header name from which to copy the characters.
308         * @param buf The buffer where the name is to be stored.
309         * @param offset The starting offset into the buffer
310         * @param length The maximum number of header bytes to copy.
311         * @return The updated offset, i.e. offset + length
312         */
313        public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) {
314            try {
315                return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
316            } catch (IOException ex) {
317                try {
318                    return formatNameBytes(name, buf, offset, length,
319                                           FALLBACK_ENCODING);
320                } catch (IOException ex2) {
321                    // impossible
322                    throw new RuntimeException(ex2);
323                }
324            }
325        }
326    
327        /**
328         * Copy a name into a buffer.
329         * Copies characters from the name into the buffer
330         * starting at the specified offset. 
331         * If the buffer is longer than the name, the buffer
332         * is filled with trailing NULs.
333         * If the name is longer than the buffer,
334         * the output is truncated.
335         *
336         * @param name The header name from which to copy the characters.
337         * @param buf The buffer where the name is to be stored.
338         * @param offset The starting offset into the buffer
339         * @param length The maximum number of header bytes to copy.
340         * @param encoding name of the encoding to use for file names
341         * @since Commons Compress 1.4
342         * @return The updated offset, i.e. offset + length
343         */
344        public static int formatNameBytes(String name, byte[] buf, final int offset,
345                                          final int length,
346                                          final ZipEncoding encoding)
347            throws IOException {
348            int len = name.length();
349            ByteBuffer b = encoding.encode(name);
350            while (b.limit() > length && len > 0) {
351                b = encoding.encode(name.substring(0, --len));
352            }
353            final int limit = b.limit();
354            System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
355    
356            // Pad any remaining output bytes with NUL
357            for (int i = limit; i < length; ++i) {
358                buf[offset + i] = 0;
359            }
360    
361            return offset + length;
362        }
363    
364        /**
365         * Fill buffer with unsigned octal number, padded with leading zeroes.
366         * 
367         * @param value number to convert to octal - treated as unsigned
368         * @param buffer destination buffer
369         * @param offset starting offset in buffer
370         * @param length length of buffer to fill
371         * @throws IllegalArgumentException if the value will not fit in the buffer
372         */
373        public static void formatUnsignedOctalString(final long value, byte[] buffer,
374                final int offset, final int length) {
375            int remaining = length;
376            remaining--;
377            if (value == 0) {
378                buffer[offset + remaining--] = (byte) '0';
379            } else {
380                long val = value;
381                for (; remaining >= 0 && val != 0; --remaining) {
382                    // CheckStyle:MagicNumber OFF
383                    buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
384                    val = val >>> 3;
385                    // CheckStyle:MagicNumber ON
386                }
387                if (val != 0){
388                    throw new IllegalArgumentException
389                    (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
390                }
391            }
392    
393            for (; remaining >= 0; --remaining) { // leading zeros
394                buffer[offset + remaining] = (byte) '0';
395            }
396        }
397    
398        /**
399         * Write an octal integer into a buffer.
400         *
401         * Uses {@link #formatUnsignedOctalString} to format
402         * the value as an octal string with leading zeros.
403         * The converted number is followed by space and NUL
404         * 
405         * @param value The value to write
406         * @param buf The buffer to receive the output
407         * @param offset The starting offset into the buffer
408         * @param length The size of the output buffer
409         * @return The updated offset, i.e offset+length
410         * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
411         */
412        public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) {
413    
414            int idx=length-2; // For space and trailing null
415            formatUnsignedOctalString(value, buf, offset, idx);
416    
417            buf[offset + idx++] = (byte) ' '; // Trailing space
418            buf[offset + idx]   = 0; // Trailing null
419    
420            return offset + length;
421        }
422    
423        /**
424         * Write an octal long integer into a buffer.
425         * 
426         * Uses {@link #formatUnsignedOctalString} to format
427         * the value as an octal string with leading zeros.
428         * The converted number is followed by a space.
429         * 
430         * @param value The value to write as octal
431         * @param buf The destinationbuffer.
432         * @param offset The starting offset into the buffer.
433         * @param length The length of the buffer
434         * @return The updated offset
435         * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
436         */
437        public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) {
438    
439            int idx=length-1; // For space
440    
441            formatUnsignedOctalString(value, buf, offset, idx);
442            buf[offset + idx] = (byte) ' '; // Trailing space
443    
444            return offset + length;
445        }
446    
447        /**
448         * Write an long integer into a buffer as an octal string if this
449         * will fit, or as a binary number otherwise.
450         * 
451         * Uses {@link #formatUnsignedOctalString} to format
452         * the value as an octal string with leading zeros.
453         * The converted number is followed by a space.
454         * 
455         * @param value The value to write into the buffer.
456         * @param buf The destination buffer.
457         * @param offset The starting offset into the buffer.
458         * @param length The length of the buffer.
459         * @return The updated offset.
460         * @throws IllegalArgumentException if the value (and trailer)
461         * will not fit in the buffer.
462         * @since 1.4
463         */
464        public static int formatLongOctalOrBinaryBytes(
465            final long value, byte[] buf, final int offset, final int length) {
466    
467            // Check whether we are dealing with UID/GID or SIZE field
468            final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
469    
470            final boolean negative = value < 0;
471            if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
472                return formatLongOctalBytes(value, buf, offset, length);
473            }
474    
475            if (length < 9) {
476                formatLongBinary(value, buf, offset, length, negative);
477            }
478            formatBigIntegerBinary(value, buf, offset, length, negative);
479    
480            buf[offset] = (byte) (negative ? 0xff : 0x80);
481            return offset + length;
482        }
483    
484        private static void formatLongBinary(final long value, byte[] buf,
485                                             final int offset, final int length,
486                                             final boolean negative) {
487            final int bits = (length - 1) * 8;
488            final long max = 1l << bits;
489            long val = Math.abs(value);
490            if (val >= max) {
491                throw new IllegalArgumentException("Value " + value +
492                    " is too large for " + length + " byte field.");
493            }
494            if (negative) {
495                val ^= max - 1;
496                val |= 0xff << bits;
497                val++;
498            }
499            for (int i = offset + length - 1; i >= offset; i--) {
500                buf[i] = (byte) val;
501                val >>= 8;
502            }
503        }
504    
505        private static void formatBigIntegerBinary(final long value, byte[] buf,
506                                                   final int offset,
507                                                   final int length,
508                                                   final boolean negative) {
509            BigInteger val = BigInteger.valueOf(value);
510            final byte[] b = val.toByteArray();
511            final int len = b.length;
512            final int off = offset + length - len;
513            System.arraycopy(b, 0, buf, off, len);
514            final byte fill = (byte) (negative ? 0xff : 0);
515            for (int i = offset + 1; i < off; i++) {
516                buf[i] = fill;
517            }
518        }
519    
520        /**
521         * Writes an octal value into a buffer.
522         * 
523         * Uses {@link #formatUnsignedOctalString} to format
524         * the value as an octal string with leading zeros.
525         * The converted number is followed by NUL and then space.
526         *
527         * @param value The value to convert
528         * @param buf The destination buffer
529         * @param offset The starting offset into the buffer.
530         * @param length The size of the buffer.
531         * @return The updated value of offset, i.e. offset+length
532         * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
533         */
534        public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) {
535    
536            int idx=length-2; // for NUL and space
537            formatUnsignedOctalString(value, buf, offset, idx);
538    
539            buf[offset + idx++]   = 0; // Trailing null
540            buf[offset + idx]     = (byte) ' '; // Trailing space
541    
542            return offset + length;
543        }
544    
545        /**
546         * Compute the checksum of a tar entry header.
547         *
548         * @param buf The tar entry's header buffer.
549         * @return The computed checksum.
550         */
551        public static long computeCheckSum(final byte[] buf) {
552            long sum = 0;
553    
554            for (int i = 0; i < buf.length; ++i) {
555                sum += BYTE_MASK & buf[i];
556            }
557    
558            return sum;
559        }
560    
561    }