001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.compressors;
020    
021    import java.util.Collections;
022    import java.util.HashMap;
023    import java.util.Locale;
024    import java.util.Map;
025    
026    /**
027     * File name mapping code for the compression formats.
028     * @ThreadSafe
029     * @since 1.4
030     */
031    public class FileNameUtil {
032    
033        /**
034         * Map from common filename suffixes to the suffixes that identify compressed
035         * versions of those file types. For example: from ".tar" to ".tgz".
036         */
037        private final Map<String, String> compressSuffix =
038            new HashMap<String, String>();
039    
040        /**
041         * Map from common filename suffixes of compressed files to the
042         * corresponding suffixes of uncompressed files. For example: from
043         * ".tgz" to ".tar".
044         * <p>
045         * This map also contains format-specific suffixes like ".gz" and "-z".
046         * These suffixes are mapped to the empty string, as they should simply
047         * be removed from the filename when the file is uncompressed.
048         */
049        private final Map<String, String> uncompressSuffix;
050    
051        /**
052         * Length of the longest compressed suffix.
053         */
054        private final int longestCompressedSuffix;
055    
056        /**
057         * Length of the shortest compressed suffix.
058         */
059        private final int shortestCompressedSuffix;
060    
061        /**
062         * Length of the longest uncompressed suffix.
063         */
064        private final int longestUncompressedSuffix;
065    
066        /**
067         * Length of the shortest uncompressed suffix longer than the
068         * empty string.
069         */
070        private final int shortestUncompressedSuffix;
071    
072        /**
073         * The format's default extension.
074         */
075        private final String defaultExtension;
076    
077        /**
078         * sets up the utility with a map of known compressed to
079         * uncompressed suffix mappings and the default extension of the
080         * format.
081         *
082         * @param uncompressSuffix Map from common filename suffixes of
083         * compressed files to the corresponding suffixes of uncompressed
084         * files. For example: from ".tgz" to ".tar".  This map also
085         * contains format-specific suffixes like ".gz" and "-z".  These
086         * suffixes are mapped to the empty string, as they should simply
087         * be removed from the filename when the file is uncompressed.
088         *
089         * @param defaultExtension the format's default extension like ".gz"
090         */
091        public FileNameUtil(Map<String, String> uncompressSuffix,
092                            String defaultExtension) {
093            this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix);
094            int lc = Integer.MIN_VALUE, sc = Integer.MAX_VALUE;
095            int lu = Integer.MIN_VALUE, su = Integer.MAX_VALUE;
096            for (Map.Entry<String, String> ent : uncompressSuffix.entrySet()) {
097                int cl = ent.getKey().length();
098                if (cl > lc) {
099                    lc = cl;
100                }
101                if (cl < sc) {
102                    sc = cl;
103                }
104    
105                String u = ent.getValue();
106                int ul = u.length();
107                if (ul > 0) {
108                    if (!compressSuffix.containsKey(u)) {
109                        compressSuffix.put(u, ent.getKey());
110                    }
111                    if (ul > lu) {
112                        lu = ul;
113                    }
114                    if (ul < su) {
115                        su = ul;
116                    }
117                }
118            }
119            longestCompressedSuffix = lc;
120            longestUncompressedSuffix = lu;
121            shortestCompressedSuffix = sc;
122            shortestUncompressedSuffix = su;
123            this.defaultExtension = defaultExtension;
124        }
125    
126        /**
127         * Detects common format suffixes in the given filename.
128         *
129         * @param filename name of a file
130         * @return {@code true} if the filename has a common format suffix,
131         *         {@code false} otherwise
132         */
133        public boolean isCompressedFilename(String filename) {
134            final String lower = filename.toLowerCase(Locale.ENGLISH);
135            final int n = lower.length();
136            for (int i = shortestCompressedSuffix;
137                 i <= longestCompressedSuffix && i < n; i++) {
138                if (uncompressSuffix.containsKey(lower.substring(n - i))) {
139                    return true;
140                }
141            }
142            return false;
143        }
144    
145        /**
146         * Maps the given name of a compressed file to the name that the
147         * file should have after uncompression. Commonly used file type specific
148         * suffixes like ".tgz" or ".svgz" are automatically detected and
149         * correctly mapped. For example the name "package.tgz" is mapped to
150         * "package.tar". And any filenames with the generic ".gz" suffix
151         * (or any other generic gzip suffix) is mapped to a name without that
152         * suffix. If no format suffix is detected, then the filename is returned
153         * unmapped.
154         *
155         * @param filename name of a file
156         * @return name of the corresponding uncompressed file
157         */
158        public String getUncompressedFilename(String filename) {
159            final String lower = filename.toLowerCase(Locale.ENGLISH);
160            final int n = lower.length();
161            for (int i = shortestCompressedSuffix;
162                 i <= longestCompressedSuffix && i < n; i++) {
163                String suffix = uncompressSuffix.get(lower.substring(n - i));
164                if (suffix != null) {
165                    return filename.substring(0, n - i) + suffix;
166                }
167            }
168            return filename;
169        }
170    
171        /**
172         * Maps the given filename to the name that the file should have after
173         * compressio. Common file types with custom suffixes for
174         * compressed versions are automatically detected and correctly mapped.
175         * For example the name "package.tar" is mapped to "package.tgz". If no
176         * custom mapping is applicable, then the default ".gz" suffix is appended
177         * to the filename.
178         *
179         * @param filename name of a file
180         * @return name of the corresponding compressed file
181         */
182        public String getCompressedFilename(String filename) {
183            final String lower = filename.toLowerCase(Locale.ENGLISH);
184            final int n = lower.length();
185            for (int i = shortestUncompressedSuffix;
186                 i <= longestUncompressedSuffix && i < n; i++) {
187                String suffix = compressSuffix.get(lower.substring(n - i));
188                if (suffix != null) {
189                    return filename.substring(0, n - i) + suffix;
190                }
191            }
192            // No custom suffix found, just append the default
193            return filename + defaultExtension;
194        }
195    
196    }