001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019 package org.apache.commons.compress.archivers.zip; 020 021 import java.nio.ByteBuffer; 022 import java.nio.charset.Charset; 023 import java.nio.charset.UnsupportedCharsetException; 024 import java.util.HashMap; 025 import java.util.Map; 026 027 import org.apache.commons.compress.utils.CharsetNames; 028 029 /** 030 * Static helper functions for robustly encoding filenames in zip files. 031 */ 032 public abstract class ZipEncodingHelper { 033 034 /** 035 * A class, which holds the high characters of a simple encoding 036 * and lazily instantiates a Simple8BitZipEncoding instance in a 037 * thread-safe manner. 038 */ 039 private static class SimpleEncodingHolder { 040 041 private final char [] highChars; 042 private Simple8BitZipEncoding encoding; 043 044 /** 045 * Instantiate a simple encoding holder. 046 * 047 * @param highChars The characters for byte codes 128 to 255. 048 * 049 * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[]) 050 */ 051 SimpleEncodingHolder(char [] highChars) { 052 this.highChars = highChars; 053 } 054 055 /** 056 * @return The associated {@link Simple8BitZipEncoding}, which 057 * is instantiated if not done so far. 058 */ 059 public synchronized Simple8BitZipEncoding getEncoding() { 060 if (this.encoding == null) { 061 this.encoding = new Simple8BitZipEncoding(this.highChars); 062 } 063 return this.encoding; 064 } 065 } 066 067 private static final Map<String, SimpleEncodingHolder> simpleEncodings; 068 069 static { 070 simpleEncodings = new HashMap<String, SimpleEncodingHolder>(); 071 072 char[] cp437_high_chars = 073 new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 074 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 075 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, 076 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 077 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 078 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 079 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 080 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 081 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 082 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 083 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534, 084 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 085 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 086 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, 087 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 088 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 089 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 090 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 091 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1, 092 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 093 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 094 0x25a0, 0x00a0 }; 095 096 SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars); 097 098 simpleEncodings.put("CP437",cp437); 099 simpleEncodings.put("Cp437",cp437); 100 simpleEncodings.put("cp437",cp437); 101 simpleEncodings.put("IBM437",cp437); 102 simpleEncodings.put("ibm437",cp437); 103 104 char[] cp850_high_chars = 105 new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 106 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 107 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, 108 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 109 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 110 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 111 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae, 112 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 113 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 114 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557, 115 0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534, 116 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3, 117 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 118 0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb, 119 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518, 120 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580, 121 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 122 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9, 123 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1, 124 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8, 125 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 126 0x25a0, 0x00a0 }; 127 128 SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars); 129 130 simpleEncodings.put("CP850",cp850); 131 simpleEncodings.put("Cp850",cp850); 132 simpleEncodings.put("cp850",cp850); 133 simpleEncodings.put("IBM850",cp850); 134 simpleEncodings.put("ibm850",cp850); 135 } 136 137 /** 138 * Grow a byte buffer, so it has a minimal capacity or at least 139 * the double capacity of the original buffer 140 * 141 * @param b The original buffer. 142 * @param newCapacity The minimal requested new capacity. 143 * @return A byte buffer <code>r</code> with 144 * <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and 145 * all the data contained in <code>b</code> copied to the beginning 146 * of <code>r</code>. 147 * 148 */ 149 static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) { 150 b.limit(b.position()); 151 b.rewind(); 152 153 int c2 = b.capacity() * 2; 154 ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2); 155 156 on.put(b); 157 return on; 158 } 159 160 161 /** 162 * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as 163 * ASCII bytes. 164 */ 165 private static final byte[] HEX_DIGITS = 166 new byte [] { 167 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 168 0x42, 0x43, 0x44, 0x45, 0x46 169 }; 170 171 /** 172 * Append <code>%Uxxxx</code> to the given byte buffer. 173 * The caller must assure, that <code>bb.remaining()>=6</code>. 174 * 175 * @param bb The byte buffer to write to. 176 * @param c The character to write. 177 */ 178 static void appendSurrogate(ByteBuffer bb, char c) { 179 180 bb.put((byte) '%'); 181 bb.put((byte) 'U'); 182 183 bb.put(HEX_DIGITS[(c >> 12)&0x0f]); 184 bb.put(HEX_DIGITS[(c >> 8)&0x0f]); 185 bb.put(HEX_DIGITS[(c >> 4)&0x0f]); 186 bb.put(HEX_DIGITS[c & 0x0f]); 187 } 188 189 190 /** 191 * name of the encoding UTF-8 192 */ 193 static final String UTF8 = "UTF8"; 194 195 /** 196 * variant name of the encoding UTF-8 used for comparisions. 197 */ 198 private static final String UTF_DASH_8 = CharsetNames.UTF_8; 199 200 /** 201 * name of the encoding UTF-8 202 */ 203 static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8); 204 205 /** 206 * Instantiates a zip encoding. 207 * 208 * @param name The name of the zip encoding. Specify {@code null} for 209 * the platform's default encoding. 210 * @return A zip encoding for the given encoding name. 211 */ 212 public static ZipEncoding getZipEncoding(String name) { 213 214 // fallback encoding is good enough for utf-8. 215 if (isUTF8(name)) { 216 return UTF8_ZIP_ENCODING; 217 } 218 219 if (name == null) { 220 return new FallbackZipEncoding(); 221 } 222 223 SimpleEncodingHolder h = simpleEncodings.get(name); 224 225 if (h!=null) { 226 return h.getEncoding(); 227 } 228 229 try { 230 231 Charset cs = Charset.forName(name); 232 return new NioZipEncoding(cs); 233 234 } catch (UnsupportedCharsetException e) { 235 return new FallbackZipEncoding(name); 236 } 237 } 238 239 /** 240 * Whether a given encoding - or the platform's default encoding 241 * if the parameter is null - is UTF-8. 242 */ 243 static boolean isUTF8(String encoding) { 244 if (encoding == null) { 245 // check platform's default encoding 246 encoding = System.getProperty("file.encoding"); 247 } 248 return UTF8.equalsIgnoreCase(encoding) 249 || UTF_DASH_8.equalsIgnoreCase(encoding); 250 } 251 }