001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *  http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    
020    package org.apache.geronimo.mail.util;
021    
022    import java.io.EOFException;
023    import java.io.IOException;
024    import java.io.InputStream;
025    import java.io.OutputStream;
026    import java.io.PrintStream;
027    import java.io.PushbackInputStream;
028    import java.io.UnsupportedEncodingException;
029    
030    /**
031     * @version $Rev: 467553 $ $Date: 2006-10-25 06:01:51 +0200 (Mi, 25. Okt 2006) $
032     */
033    public class QuotedPrintableEncoder implements Encoder {
034    
035        static protected final byte[] encodingTable =
036        {
037            (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
038            (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
039        };
040    
041        /*
042         * set up the decoding table.
043         */
044        static protected final byte[] decodingTable = new byte[128];
045    
046        static {
047            // initialize the decoding table
048            for (int i = 0; i < encodingTable.length; i++)
049            {
050                decodingTable[encodingTable[i]] = (byte)i;
051            }
052        }
053    
054    
055        // default number of characters we will write per line.
056        static private final int DEFAULT_CHARS_PER_LINE = 76;
057    
058        // the output stream we're wrapped around
059        protected OutputStream out;
060        // the number of bytes written;
061        protected int bytesWritten = 0;
062        // number of bytes written on the current line
063        protected int lineCount = 0;
064        // line length we're dealing with
065        protected int lineLength;
066        // number of deferred whitespace characters in decode mode.
067        protected int deferredWhitespace = 0;
068    
069        protected int cachedCharacter = -1;
070    
071        // indicates whether the last character was a '\r', potentially part of a CRLF sequence.
072        protected boolean lastCR = false;
073        // remember whether last character was a white space.
074        protected boolean lastWhitespace = false;
075    
076        public QuotedPrintableEncoder() {
077            this(null, DEFAULT_CHARS_PER_LINE);
078        }
079    
080        public QuotedPrintableEncoder(OutputStream out) {
081            this(out, DEFAULT_CHARS_PER_LINE);
082        }
083    
084        public QuotedPrintableEncoder(OutputStream out, int lineLength) {
085            this.out = out;
086            this.lineLength = lineLength;
087        }
088    
089        private void checkDeferred(int ch) throws IOException {
090            // was the last character we looked at a whitespace?  Try to decide what to do with it now.
091            if (lastWhitespace) {
092                // if this whitespace is at the end of the line, write it out encoded
093                if (ch == '\r' || ch == '\n') {
094                    writeEncodedCharacter(' ');
095                }
096                else {
097                    // we can write this out without encoding.
098                    writeCharacter(' ');
099                }
100                // we always turn this off.
101                lastWhitespace = false;
102            }
103            // deferred carriage return?
104            else if (lastCR) {
105                // if the char following the CR was not a new line, write an EOL now.
106                if (ch != '\n') {
107                    writeEOL();
108                }
109                // we always turn this off too
110                lastCR = false;
111            }
112        }
113    
114    
115        /**
116         * encode the input data producing a UUEncoded output stream.
117         *
118         * @param data   The array of byte data.
119         * @param off    The starting offset within the data.
120         * @param length Length of the data to encode.
121         *
122         * @return the number of bytes produced.
123         */
124        public int encode(byte[] data, int off, int length) throws IOException {
125            int endOffset = off + length;
126    
127            while (off < endOffset) {
128                // get the character
129                byte ch = data[off++];
130    
131                // handle the encoding of this character.
132                encode(ch);
133            }
134    
135            return bytesWritten;
136        }
137    
138    
139        public void encode(int ch) throws IOException {
140            // make sure this is just a single byte value.
141            ch = ch &0xFF;
142    
143            // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary.
144            checkDeferred(ch);
145            // different characters require special handling.
146            switch (ch) {
147                // spaces require special handling.  If the next character is a line terminator, then
148                // the space needs to be encoded.
149                case ' ':
150                {
151                    // at this point, we don't know whether this needs encoding or not.  If the next
152                    // character is a linend, it gets encoded.  If anything else, we just write it as is.
153                    lastWhitespace = true;
154                    // turn off any CR flags.
155                    lastCR = false;
156                    break;
157                }
158    
159                // carriage return, which may be part of a CRLF sequence.
160                case '\r':
161                {
162                    // just flag this until we see the next character.
163                    lastCR = true;
164                    break;
165                }
166    
167                // a new line character...we need to check to see if it was paired up with a '\r' char.
168                case '\n':
169                {
170                    // we always write this out for a newline.  We defer CRs until we see if the LF follows.
171                    writeEOL();
172                    break;
173                }
174    
175                // an '=' is the escape character for an encoded character, so it must also
176                // be written encoded.
177                case '=':
178                {
179                    writeEncodedCharacter(ch);
180                    break;
181                }
182    
183                // all other characters.  If outside the printable character range, write it encoded.
184                default:
185                {
186                    if (ch < 32 || ch >= 127) {
187                        writeEncodedCharacter(ch);
188                    }
189                    else {
190                        writeCharacter(ch);
191                    }
192                    break;
193                }
194            }
195        }
196    
197    
198        /**
199         * encode the input data producing a UUEncoded output stream.
200         *
201         * @param data   The array of byte data.
202         * @param off    The starting offset within the data.
203         * @param length Length of the data to encode.
204         *
205         * @return the number of bytes produced.
206         */
207        public int encode(byte[] data, int off, int length, String specials) throws IOException {
208            int endOffset = off + length;
209    
210            while (off < endOffset) {
211                // get the character
212                byte ch = data[off++];
213    
214                // handle the encoding of this character.
215                encode(ch, specials);
216            }
217    
218            return bytesWritten;
219        }
220    
221    
222        /**
223         * encode the input data producing a UUEncoded output stream.
224         *
225         * @param data   The array of byte data.
226         * @param off    The starting offset within the data.
227         * @param length Length of the data to encode.
228         *
229         * @return the number of bytes produced.
230         */
231        public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException {
232            int count = 0;
233    
234            while (count < limit) {
235                int ch = in.read();
236    
237                if (ch == -1) {
238                    return count;
239                }
240                // make sure this is just a single byte value.
241                ch = ch &0xFF;
242    
243                // spaces require special handling.  If the next character is a line terminator, then
244                // the space needs to be encoded.
245                if (ch == ' ') {
246                    // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
247                    out.append('_');
248                    count++;
249                }
250                // non-ascii chars and the designated specials all get encoded.
251                else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
252                    // we need at least 3 characters to write this out, so we need to
253                    // forget we saw this one and try in the next segment.
254                    if (count + 3 > limit) {
255                        in.unread(ch);
256                        return count;
257                    }
258                    out.append('=');
259                    out.append((char)encodingTable[ch >> 4]);
260                    out.append((char)encodingTable[ch & 0x0F]);
261                    count += 3;
262                }
263                else {
264                    // good character, just use unchanged.
265                    out.append((char)ch);
266                    count++;
267                }
268            }
269            return count;
270        }
271    
272    
273        /**
274         * Specialized version of the decoder that handles encoding of
275         * RFC 2047 encoded word values.  This has special handling for
276         * certain characters, but less special handling for blanks and
277         * linebreaks.
278         *
279         * @param ch
280         * @param specials
281         *
282         * @exception IOException
283         */
284        public void encode(int ch, String specials) throws IOException {
285            // make sure this is just a single byte value.
286            ch = ch &0xFF;
287    
288            // spaces require special handling.  If the next character is a line terminator, then
289            // the space needs to be encoded.
290            if (ch == ' ') {
291                // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
292                writeCharacter('_');
293            }
294            // non-ascii chars and the designated specials all get encoded.
295            else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
296                writeEncodedCharacter(ch);
297            }
298            else {
299                // good character, just use unchanged.
300                writeCharacter(ch);
301            }
302        }
303    
304    
305        /**
306         * encode the input data producing a UUEncoded output stream.
307         *
308         * @param data   The array of byte data.
309         * @param off    The starting offset within the data.
310         * @param length Length of the data to encode.
311         * @param out    The output stream the encoded data is written to.
312         *
313         * @return the number of bytes produced.
314         */
315        public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
316            // make sure we're writing to the correct stream
317            this.out = out;
318            bytesWritten = 0;
319    
320            // do the actual encoding
321            return encode(data, off, length);
322        }
323    
324    
325        /**
326         * decode the uuencoded byte data writing it to the given output stream
327         *
328         * @param data   The array of byte data to decode.
329         * @param off    Starting offset within the array.
330         * @param length The length of data to encode.
331         * @param out    The output stream used to return the decoded data.
332         *
333         * @return the number of bytes produced.
334         * @exception IOException
335         */
336        public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
337            // make sure we're writing to the correct stream
338            this.out = out;
339    
340            int endOffset = off + length;
341            int bytesWritten = 0;
342    
343            while (off < endOffset) {
344                byte ch = data[off++];
345    
346                // space characters are a pain.  We need to scan ahead until we find a non-space character.
347                // if the character is a line terminator, we need to discard the blanks.
348                if (ch == ' ') {
349                    int trailingSpaces = 1;
350                    // scan forward, counting the characters.
351                    while (off < endOffset && data[off] == ' ') {
352                        // step forward and count this.
353                        off++;
354                        trailingSpaces++;
355                    }
356                    // is this a lineend at the current location?
357                    if (off >= endOffset || data[off] == '\r' || data[off] == '\n') {
358                        // go to the next one
359                        continue;
360                    }
361                    else {
362                        // make sure we account for the spaces in the output count.
363                        bytesWritten += trailingSpaces;
364                        // write out the blank characters we counted and continue with the non-blank.
365                        while (trailingSpaces-- > 0) {
366                            out.write(' ');
367                        }
368                    }
369                }
370                else if (ch == '=') {
371                    // we found an encoded character.  Reduce the 3 char sequence to one.
372                    // but first, make sure we have two characters to work with.
373                    if (off + 1 >= endOffset) {
374                        throw new IOException("Invalid quoted printable encoding");
375                    }
376                    // convert the two bytes back from hex.
377                    byte b1 = data[off++];
378                    byte b2 = data[off++];
379    
380                    // we've found an encoded carriage return.  The next char needs to be a newline
381                    if (b1 == '\r') {
382                        if (b2 != '\n') {
383                            throw new IOException("Invalid quoted printable encoding");
384                        }
385                        // this was a soft linebreak inserted by the encoding.  We just toss this away
386                        // on decode.
387                    }
388                    else {
389                        // this is a hex pair we need to convert back to a single byte.
390                        b1 = decodingTable[b1];
391                        b2 = decodingTable[b2];
392                        out.write((b1 << 4) | b2);
393                        // 3 bytes in, one byte out
394                        bytesWritten++;
395                    }
396                }
397                else {
398                    // simple character, just write it out.
399                    out.write(ch);
400                    bytesWritten++;
401                }
402            }
403    
404            return bytesWritten;
405        }
406    
407        /**
408         * Decode a byte array of data.
409         *
410         * @param data   The data array.
411         * @param out    The output stream target for the decoded data.
412         *
413         * @return The number of bytes written to the stream.
414         * @exception IOException
415         */
416        public int decodeWord(byte[] data, OutputStream out) throws IOException {
417            return decodeWord(data, 0, data.length, out);
418        }
419    
420    
421        /**
422         * decode the uuencoded byte data writing it to the given output stream
423         *
424         * @param data   The array of byte data to decode.
425         * @param off    Starting offset within the array.
426         * @param length The length of data to encode.
427         * @param out    The output stream used to return the decoded data.
428         *
429         * @return the number of bytes produced.
430         * @exception IOException
431         */
432        public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException {
433            // make sure we're writing to the correct stream
434            this.out = out;
435    
436            int endOffset = off + length;
437            int bytesWritten = 0;
438    
439            while (off < endOffset) {
440                byte ch = data[off++];
441    
442                // space characters were translated to '_' on encode, so we need to translate them back.
443                if (ch == '_') {
444                    out.write(' ');
445                }
446                else if (ch == '=') {
447                    // we found an encoded character.  Reduce the 3 char sequence to one.
448                    // but first, make sure we have two characters to work with.
449                    if (off + 1 >= endOffset) {
450                        throw new IOException("Invalid quoted printable encoding");
451                    }
452                    // convert the two bytes back from hex.
453                    byte b1 = data[off++];
454                    byte b2 = data[off++];
455    
456                    // we've found an encoded carriage return.  The next char needs to be a newline
457                    if (b1 == '\r') {
458                        if (b2 != '\n') {
459                            throw new IOException("Invalid quoted printable encoding");
460                        }
461                        // this was a soft linebreak inserted by the encoding.  We just toss this away
462                        // on decode.
463                    }
464                    else {
465                        // this is a hex pair we need to convert back to a single byte.
466                        byte c1 = decodingTable[b1];
467                        byte c2 = decodingTable[b2];
468                        out.write((c1 << 4) | c2);
469                        // 3 bytes in, one byte out
470                        bytesWritten++;
471                    }
472                }
473                else {
474                    // simple character, just write it out.
475                    out.write(ch);
476                    bytesWritten++;
477                }
478            }
479    
480            return bytesWritten;
481        }
482    
483    
484        /**
485         * decode the UUEncoded String data writing it to the given output stream.
486         *
487         * @param data   The String data to decode.
488         * @param out    The output stream to write the decoded data to.
489         *
490         * @return the number of bytes produced.
491         * @exception IOException
492         */
493        public int decode(String data, OutputStream out) throws IOException {
494            try {
495                // just get the byte data and decode.
496                byte[] bytes = data.getBytes("US-ASCII");
497                return decode(bytes, 0, bytes.length, out);
498            } catch (UnsupportedEncodingException e) {
499                throw new IOException("Invalid UUEncoding");
500            }
501        }
502    
503        private void checkLineLength(int required) throws IOException {
504            // if we're at our line length limit, write out a soft line break and reset.
505            if ((lineCount + required) > lineLength ) {
506                out.write('=');
507                out.write('\r');
508                out.write('\n');
509                bytesWritten += 3;
510                lineCount = 0;
511            }
512        }
513    
514    
515        public void writeEncodedCharacter(int ch) throws IOException {
516            // we need 3 characters for an encoded value
517            checkLineLength(3);
518            out.write('=');
519            out.write(encodingTable[ch >> 4]);
520            out.write(encodingTable[ch & 0x0F]);
521            lineCount += 3;
522            bytesWritten += 3;
523        }
524    
525    
526        public void writeCharacter(int ch) throws IOException {
527            // we need 3 characters for an encoded value
528            checkLineLength(1);
529            out.write(ch);
530            lineCount++;
531            bytesWritten++;
532        }
533    
534    
535        public void writeEOL() throws IOException {
536            out.write('\r');
537            out.write('\n');
538            lineCount = 0;
539            bytesWritten += 3;
540        }
541    
542    
543        public int decode(InputStream in) throws IOException {
544    
545            // we potentially need to scan over spans of whitespace characters to determine if they're real
546            // we just return blanks until the count goes to zero.
547            if (deferredWhitespace > 0) {
548                deferredWhitespace--;
549                return ' ';
550            }
551    
552            // we may have needed to scan ahead to find the first non-blank character, which we would store here.
553            // hand that back once we're done with the blanks.
554            if (cachedCharacter != -1) {
555                int result = cachedCharacter;
556                cachedCharacter = -1;
557                return result;
558            }
559    
560            int ch = in.read();
561    
562            // reflect back an EOF condition.
563            if (ch == -1) {
564                return -1;
565            }
566    
567            // space characters are a pain.  We need to scan ahead until we find a non-space character.
568            // if the character is a line terminator, we need to discard the blanks.
569            if (ch == ' ') {
570                // scan forward, counting the characters.
571                while ((ch = in.read()) == ' ') {
572                    deferredWhitespace++;
573                }
574    
575                // is this a lineend at the current location?
576                if (ch == -1 || ch == '\r' || ch == '\n') {
577                    // those blanks we so zealously counted up don't really exist.  Clear out the counter.
578                    deferredWhitespace = 0;
579                    // return the real significant character now.
580                    return ch;
581                }
582                else {
583                // remember this character for later, after we've used up the deferred blanks.
584                    cachedCharacter = ch;
585                    // return this space.  We did not include this one in the deferred count, so we're right in sync.
586                    return ' ';
587                }
588            }
589            else if (ch == '=') {
590                int b1 = in.read();
591                // we need to get two characters after the quotation marker
592                if (b1 == -1) {
593                    throw new IOException("Truncated quoted printable data");
594                }
595                int b2 = in.read();
596                // we need to get two characters after the quotation marker
597                if (b2 == -1) {
598                    throw new IOException("Truncated quoted printable data");
599                }
600    
601                // we've found an encoded carriage return.  The next char needs to be a newline
602                if (b1 == '\r') {
603                    if (b2 != '\n') {
604                        throw new IOException("Invalid quoted printable encoding");
605                    }
606                    // this was a soft linebreak inserted by the encoding.  We just toss this away
607                    // on decode.  We need to return something, so recurse and decode the next.
608                    return decode(in);
609                }
610                else {
611                    // this is a hex pair we need to convert back to a single byte.
612                    b1 = decodingTable[b1];
613                    b2 = decodingTable[b2];
614                    return (b1 << 4) | b2;
615                }
616            }
617            else {
618                return ch;
619            }
620        }
621    
622    
623        /**
624         * Perform RFC-2047 word encoding using Q-P data encoding.
625         *
626         * @param in       The source for the encoded data.
627         * @param charset  The charset tag to be added to each encoded data section.
628         * @param specials The set of special characters that we require to encoded.
629         * @param out      The output stream where the encoded data is to be written.
630         * @param fold     Controls whether separate sections of encoded data are separated by
631         *                 linebreaks or whitespace.
632         *
633         * @exception IOException
634         */
635        public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException
636        {
637            // we need to scan ahead in a few places, which may require pushing characters back on to the stream.
638            // make sure we have a stream where this is possible.
639            PushbackInputStream inStream = new PushbackInputStream(in);
640            PrintStream writer = new PrintStream(out);
641    
642            // segments of encoded data are limited to 76 byes, including the control sections.
643            int limit = 76 - 7 - charset.length();
644            boolean firstLine = true;
645            StringBuffer encodedString = new StringBuffer(76);
646    
647            while (true) {
648    
649                // encode another segment of data.
650                encode(inStream, encodedString, specials, limit);
651                // nothing encoded means we've hit the end of the data.
652                if (encodedString.length() == 0) {
653                    break;
654                }
655                // if we have more than one segment, we need to insert separators.  Depending on whether folding
656                // was requested, this is either a blank or a linebreak.
657                if (!firstLine) {
658                    if (fold) {
659                        writer.print("\r\n");
660                    }
661                    else {
662                        writer.print(" ");
663                    }
664                }
665    
666                // add the encoded word header
667                writer.print("=?");
668                writer.print(charset);
669                writer.print("?Q?");
670                // the data
671                writer.print(encodedString.toString());
672                // and the terminator mark
673                writer.print("?=");
674                writer.flush();
675    
676                // we reset the string buffer and reuse it.
677                encodedString.setLength(0);
678            }
679        }
680    }
681    
682    
683