001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    
020    package org.apache.commons.compress.archivers.zip;
021    
022    import java.io.IOException;
023    import java.nio.ByteBuffer;
024    
025    /**
026     * An interface for encoders that do a pretty encoding of ZIP
027     * filenames.
028     *
029     * <p>There are mostly two implementations, one that uses java.nio
030     * {@link java.nio.charset.Charset Charset} and one implementation,
031     * which copes with simple 8 bit charsets, because java-1.4 did not
032     * support Cp437 in java.nio.</p>
033     * 
034     * <p>The main reason for defining an own encoding layer comes from
035     * the problems with {@link java.lang.String#getBytes(String)
036     * String.getBytes}, which encodes unknown characters as ASCII
037     * quotation marks ('?'). Quotation marks are per definition an
038     * invalid filename on some operating systems  like Windows, which
039     * leads to ignored ZIP entries.</p>
040     * 
041     * <p>All implementations should implement this interface in a
042     * reentrant way.</p>
043     */
044    public interface ZipEncoding {
045        /**
046         * Check, whether the given string may be losslessly encoded using this
047         * encoding.
048         * 
049         * @param name A filename or ZIP comment.
050         * @return Whether the given name may be encoded with out any losses.
051         */
052        boolean canEncode(String name);
053    
054        /**
055         * Encode a filename or a comment to a byte array suitable for
056         * storing it to a serialized zip entry.
057         * 
058         * <p>Examples for CP 437 (in pseudo-notation, right hand side is
059         * C-style notation):</p>
060         * <pre>
061         *  encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
062         *  encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
063         * </pre>
064         * 
065         * @param name A filename or ZIP comment. 
066         * @return A byte buffer with a backing array containing the
067         *         encoded name.  Unmappable characters or malformed
068         *         character sequences are mapped to a sequence of utf-16
069         *         words encoded in the format <code>%Uxxxx</code>.  It is
070         *         assumed, that the byte buffer is positioned at the
071         *         beginning of the encoded result, the byte buffer has a
072         *         backing array and the limit of the byte buffer points
073         *         to the end of the encoded result.
074         * @throws IOException 
075         */
076        ByteBuffer encode(String name) throws IOException;
077    
078        /**
079         * @param data The byte values to decode.
080         * @return The decoded string.
081         * @throws IOException 
082         */
083        String decode(byte [] data) throws IOException;
084    }