001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one or more
003     *  contributor license agreements.  See the NOTICE file distributed with
004     *  this work for additional information regarding copyright ownership.
005     *  The ASF licenses this file to You under the Apache License, Version 2.0
006     *  (the "License"); you may not use this file except in compliance with
007     *  the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     *
017     */
018    
019    package org.apache.commons.compress.archivers.zip;
020    
021    import java.nio.ByteBuffer;
022    import java.nio.charset.Charset;
023    import java.nio.charset.UnsupportedCharsetException;
024    import java.util.HashMap;
025    import java.util.Map;
026    
027    import org.apache.commons.compress.utils.CharsetNames;
028    
029    /**
030     * Static helper functions for robustly encoding filenames in zip files. 
031     */
032    public abstract class ZipEncodingHelper {
033    
034        /**
035         * A class, which holds the high characters of a simple encoding
036         * and lazily instantiates a Simple8BitZipEncoding instance in a
037         * thread-safe manner.
038         */
039        private static class SimpleEncodingHolder {
040    
041            private final char [] highChars;
042            private Simple8BitZipEncoding encoding;
043    
044            /**
045             * Instantiate a simple encoding holder.
046             * 
047             * @param highChars The characters for byte codes 128 to 255.
048             * 
049             * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
050             */
051            SimpleEncodingHolder(char [] highChars) {
052                this.highChars = highChars;
053            }
054    
055            /**
056             * @return The associated {@link Simple8BitZipEncoding}, which
057             *         is instantiated if not done so far.
058             */
059            public synchronized Simple8BitZipEncoding getEncoding() {
060                if (this.encoding == null) {
061                    this.encoding = new Simple8BitZipEncoding(this.highChars);
062                }
063                return this.encoding;
064            }
065        }
066    
067        private static final Map<String, SimpleEncodingHolder> simpleEncodings;
068    
069        static {
070            simpleEncodings = new HashMap<String, SimpleEncodingHolder>();
071    
072            char[] cp437_high_chars =
073                new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
074                             0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
075                             0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
076                             0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
077                             0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
078                             0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
079                             0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
080                             0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
081                             0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
082                             0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
083                             0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
084                             0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
085                             0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
086                             0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
087                             0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
088                             0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
089                             0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
090                             0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
091                             0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
092                             0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
093                             0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
094                             0x25a0, 0x00a0 };
095    
096            SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
097    
098            simpleEncodings.put("CP437",cp437);
099            simpleEncodings.put("Cp437",cp437);
100            simpleEncodings.put("cp437",cp437);
101            simpleEncodings.put("IBM437",cp437);
102            simpleEncodings.put("ibm437",cp437);
103    
104            char[] cp850_high_chars =
105                new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
106                             0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
107                             0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
108                             0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
109                             0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
110                             0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
111                             0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
112                             0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
113                             0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
114                             0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
115                             0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
116                             0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
117                             0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
118                             0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
119                             0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
120                             0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
121                             0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
122                             0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
123                             0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
124                             0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
125                             0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
126                             0x25a0, 0x00a0 };
127    
128            SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
129    
130            simpleEncodings.put("CP850",cp850);
131            simpleEncodings.put("Cp850",cp850);
132            simpleEncodings.put("cp850",cp850);
133            simpleEncodings.put("IBM850",cp850);
134            simpleEncodings.put("ibm850",cp850);
135        }
136    
137        /**
138         * Grow a byte buffer, so it has a minimal capacity or at least
139         * the double capacity of the original buffer 
140         * 
141         * @param b The original buffer.
142         * @param newCapacity The minimal requested new capacity.
143         * @return A byte buffer <code>r</code> with
144         *         <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
145         *         all the data contained in <code>b</code> copied to the beginning
146         *         of <code>r</code>.
147         *
148         */
149        static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) {
150            b.limit(b.position());
151            b.rewind();
152    
153            int c2 = b.capacity() * 2;
154            ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);
155    
156            on.put(b);
157            return on;
158        }
159    
160     
161        /**
162         * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
163         * ASCII bytes.
164         */
165        private static final byte[] HEX_DIGITS =
166            new byte [] {
167            0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41,
168            0x42, 0x43, 0x44, 0x45, 0x46
169        };
170    
171        /**
172         * Append <code>%Uxxxx</code> to the given byte buffer.
173         * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
174         * 
175         * @param bb The byte buffer to write to.
176         * @param c The character to write.
177         */
178        static void appendSurrogate(ByteBuffer bb, char c) {
179    
180            bb.put((byte) '%');
181            bb.put((byte) 'U');
182    
183            bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
184            bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
185            bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
186            bb.put(HEX_DIGITS[c & 0x0f]);
187        }
188    
189    
190        /**
191         * name of the encoding UTF-8
192         */
193        static final String UTF8 = "UTF8";
194    
195        /**
196         * variant name of the encoding UTF-8 used for comparisions.
197         */
198        private static final String UTF_DASH_8 = CharsetNames.UTF_8;
199    
200        /**
201         * name of the encoding UTF-8
202         */
203        static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
204    
205        /**
206         * Instantiates a zip encoding.
207         * 
208         * @param name The name of the zip encoding. Specify {@code null} for
209         *             the platform's default encoding.
210         * @return A zip encoding for the given encoding name.
211         */
212        public static ZipEncoding getZipEncoding(String name) {
213     
214            // fallback encoding is good enough for utf-8.
215            if (isUTF8(name)) {
216                return UTF8_ZIP_ENCODING;
217            }
218    
219            if (name == null) {
220                return new FallbackZipEncoding();
221            }
222    
223            SimpleEncodingHolder h = simpleEncodings.get(name);
224    
225            if (h!=null) {
226                return h.getEncoding();
227            }
228    
229            try {
230    
231                Charset cs = Charset.forName(name);
232                return new NioZipEncoding(cs);
233    
234            } catch (UnsupportedCharsetException e) {
235                return new FallbackZipEncoding(name);
236            }
237        }
238    
239        /**
240         * Whether a given encoding - or the platform's default encoding
241         * if the parameter is null - is UTF-8.
242         */
243        static boolean isUTF8(String encoding) {
244            if (encoding == null) {
245                // check platform's default encoding
246                encoding = System.getProperty("file.encoding");
247            }
248            return UTF8.equalsIgnoreCase(encoding)
249                || UTF_DASH_8.equalsIgnoreCase(encoding);
250        }
251    }