001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020 package org.apache.geronimo.mail.util; 021 022 import java.io.EOFException; 023 import java.io.IOException; 024 import java.io.InputStream; 025 import java.io.OutputStream; 026 import java.io.PrintStream; 027 import java.io.PushbackInputStream; 028 import java.io.UnsupportedEncodingException; 029 030 /** 031 * @version $Rev: 467553 $ $Date: 2006-10-25 06:01:51 +0200 (Mi, 25. Okt 2006) $ 032 */ 033 public class QuotedPrintableEncoder implements Encoder { 034 035 static protected final byte[] encodingTable = 036 { 037 (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', 038 (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F' 039 }; 040 041 /* 042 * set up the decoding table. 043 */ 044 static protected final byte[] decodingTable = new byte[128]; 045 046 static { 047 // initialize the decoding table 048 for (int i = 0; i < encodingTable.length; i++) 049 { 050 decodingTable[encodingTable[i]] = (byte)i; 051 } 052 } 053 054 055 // default number of characters we will write per line. 056 static private final int DEFAULT_CHARS_PER_LINE = 76; 057 058 // the output stream we're wrapped around 059 protected OutputStream out; 060 // the number of bytes written; 061 protected int bytesWritten = 0; 062 // number of bytes written on the current line 063 protected int lineCount = 0; 064 // line length we're dealing with 065 protected int lineLength; 066 // number of deferred whitespace characters in decode mode. 067 protected int deferredWhitespace = 0; 068 069 protected int cachedCharacter = -1; 070 071 // indicates whether the last character was a '\r', potentially part of a CRLF sequence. 072 protected boolean lastCR = false; 073 // remember whether last character was a white space. 074 protected boolean lastWhitespace = false; 075 076 public QuotedPrintableEncoder() { 077 this(null, DEFAULT_CHARS_PER_LINE); 078 } 079 080 public QuotedPrintableEncoder(OutputStream out) { 081 this(out, DEFAULT_CHARS_PER_LINE); 082 } 083 084 public QuotedPrintableEncoder(OutputStream out, int lineLength) { 085 this.out = out; 086 this.lineLength = lineLength; 087 } 088 089 private void checkDeferred(int ch) throws IOException { 090 // was the last character we looked at a whitespace? Try to decide what to do with it now. 091 if (lastWhitespace) { 092 // if this whitespace is at the end of the line, write it out encoded 093 if (ch == '\r' || ch == '\n') { 094 writeEncodedCharacter(' '); 095 } 096 else { 097 // we can write this out without encoding. 098 writeCharacter(' '); 099 } 100 // we always turn this off. 101 lastWhitespace = false; 102 } 103 // deferred carriage return? 104 else if (lastCR) { 105 // if the char following the CR was not a new line, write an EOL now. 106 if (ch != '\n') { 107 writeEOL(); 108 } 109 // we always turn this off too 110 lastCR = false; 111 } 112 } 113 114 115 /** 116 * encode the input data producing a UUEncoded output stream. 117 * 118 * @param data The array of byte data. 119 * @param off The starting offset within the data. 120 * @param length Length of the data to encode. 121 * 122 * @return the number of bytes produced. 123 */ 124 public int encode(byte[] data, int off, int length) throws IOException { 125 int endOffset = off + length; 126 127 while (off < endOffset) { 128 // get the character 129 byte ch = data[off++]; 130 131 // handle the encoding of this character. 132 encode(ch); 133 } 134 135 return bytesWritten; 136 } 137 138 139 public void encode(int ch) throws IOException { 140 // make sure this is just a single byte value. 141 ch = ch &0xFF; 142 143 // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary. 144 checkDeferred(ch); 145 // different characters require special handling. 146 switch (ch) { 147 // spaces require special handling. If the next character is a line terminator, then 148 // the space needs to be encoded. 149 case ' ': 150 { 151 // at this point, we don't know whether this needs encoding or not. If the next 152 // character is a linend, it gets encoded. If anything else, we just write it as is. 153 lastWhitespace = true; 154 // turn off any CR flags. 155 lastCR = false; 156 break; 157 } 158 159 // carriage return, which may be part of a CRLF sequence. 160 case '\r': 161 { 162 // just flag this until we see the next character. 163 lastCR = true; 164 break; 165 } 166 167 // a new line character...we need to check to see if it was paired up with a '\r' char. 168 case '\n': 169 { 170 // we always write this out for a newline. We defer CRs until we see if the LF follows. 171 writeEOL(); 172 break; 173 } 174 175 // an '=' is the escape character for an encoded character, so it must also 176 // be written encoded. 177 case '=': 178 { 179 writeEncodedCharacter(ch); 180 break; 181 } 182 183 // all other characters. If outside the printable character range, write it encoded. 184 default: 185 { 186 if (ch < 32 || ch >= 127) { 187 writeEncodedCharacter(ch); 188 } 189 else { 190 writeCharacter(ch); 191 } 192 break; 193 } 194 } 195 } 196 197 198 /** 199 * encode the input data producing a UUEncoded output stream. 200 * 201 * @param data The array of byte data. 202 * @param off The starting offset within the data. 203 * @param length Length of the data to encode. 204 * 205 * @return the number of bytes produced. 206 */ 207 public int encode(byte[] data, int off, int length, String specials) throws IOException { 208 int endOffset = off + length; 209 210 while (off < endOffset) { 211 // get the character 212 byte ch = data[off++]; 213 214 // handle the encoding of this character. 215 encode(ch, specials); 216 } 217 218 return bytesWritten; 219 } 220 221 222 /** 223 * encode the input data producing a UUEncoded output stream. 224 * 225 * @param data The array of byte data. 226 * @param off The starting offset within the data. 227 * @param length Length of the data to encode. 228 * 229 * @return the number of bytes produced. 230 */ 231 public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException { 232 int count = 0; 233 234 while (count < limit) { 235 int ch = in.read(); 236 237 if (ch == -1) { 238 return count; 239 } 240 // make sure this is just a single byte value. 241 ch = ch &0xFF; 242 243 // spaces require special handling. If the next character is a line terminator, then 244 // the space needs to be encoded. 245 if (ch == ' ') { 246 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 247 out.append('_'); 248 count++; 249 } 250 // non-ascii chars and the designated specials all get encoded. 251 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 252 // we need at least 3 characters to write this out, so we need to 253 // forget we saw this one and try in the next segment. 254 if (count + 3 > limit) { 255 in.unread(ch); 256 return count; 257 } 258 out.append('='); 259 out.append((char)encodingTable[ch >> 4]); 260 out.append((char)encodingTable[ch & 0x0F]); 261 count += 3; 262 } 263 else { 264 // good character, just use unchanged. 265 out.append((char)ch); 266 count++; 267 } 268 } 269 return count; 270 } 271 272 273 /** 274 * Specialized version of the decoder that handles encoding of 275 * RFC 2047 encoded word values. This has special handling for 276 * certain characters, but less special handling for blanks and 277 * linebreaks. 278 * 279 * @param ch 280 * @param specials 281 * 282 * @exception IOException 283 */ 284 public void encode(int ch, String specials) throws IOException { 285 // make sure this is just a single byte value. 286 ch = ch &0xFF; 287 288 // spaces require special handling. If the next character is a line terminator, then 289 // the space needs to be encoded. 290 if (ch == ' ') { 291 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 292 writeCharacter('_'); 293 } 294 // non-ascii chars and the designated specials all get encoded. 295 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 296 writeEncodedCharacter(ch); 297 } 298 else { 299 // good character, just use unchanged. 300 writeCharacter(ch); 301 } 302 } 303 304 305 /** 306 * encode the input data producing a UUEncoded output stream. 307 * 308 * @param data The array of byte data. 309 * @param off The starting offset within the data. 310 * @param length Length of the data to encode. 311 * @param out The output stream the encoded data is written to. 312 * 313 * @return the number of bytes produced. 314 */ 315 public int encode(byte[] data, int off, int length, OutputStream out) throws IOException { 316 // make sure we're writing to the correct stream 317 this.out = out; 318 bytesWritten = 0; 319 320 // do the actual encoding 321 return encode(data, off, length); 322 } 323 324 325 /** 326 * decode the uuencoded byte data writing it to the given output stream 327 * 328 * @param data The array of byte data to decode. 329 * @param off Starting offset within the array. 330 * @param length The length of data to encode. 331 * @param out The output stream used to return the decoded data. 332 * 333 * @return the number of bytes produced. 334 * @exception IOException 335 */ 336 public int decode(byte[] data, int off, int length, OutputStream out) throws IOException { 337 // make sure we're writing to the correct stream 338 this.out = out; 339 340 int endOffset = off + length; 341 int bytesWritten = 0; 342 343 while (off < endOffset) { 344 byte ch = data[off++]; 345 346 // space characters are a pain. We need to scan ahead until we find a non-space character. 347 // if the character is a line terminator, we need to discard the blanks. 348 if (ch == ' ') { 349 int trailingSpaces = 1; 350 // scan forward, counting the characters. 351 while (off < endOffset && data[off] == ' ') { 352 // step forward and count this. 353 off++; 354 trailingSpaces++; 355 } 356 // is this a lineend at the current location? 357 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') { 358 // go to the next one 359 continue; 360 } 361 else { 362 // make sure we account for the spaces in the output count. 363 bytesWritten += trailingSpaces; 364 // write out the blank characters we counted and continue with the non-blank. 365 while (trailingSpaces-- > 0) { 366 out.write(' '); 367 } 368 } 369 } 370 else if (ch == '=') { 371 // we found an encoded character. Reduce the 3 char sequence to one. 372 // but first, make sure we have two characters to work with. 373 if (off + 1 >= endOffset) { 374 throw new IOException("Invalid quoted printable encoding"); 375 } 376 // convert the two bytes back from hex. 377 byte b1 = data[off++]; 378 byte b2 = data[off++]; 379 380 // we've found an encoded carriage return. The next char needs to be a newline 381 if (b1 == '\r') { 382 if (b2 != '\n') { 383 throw new IOException("Invalid quoted printable encoding"); 384 } 385 // this was a soft linebreak inserted by the encoding. We just toss this away 386 // on decode. 387 } 388 else { 389 // this is a hex pair we need to convert back to a single byte. 390 b1 = decodingTable[b1]; 391 b2 = decodingTable[b2]; 392 out.write((b1 << 4) | b2); 393 // 3 bytes in, one byte out 394 bytesWritten++; 395 } 396 } 397 else { 398 // simple character, just write it out. 399 out.write(ch); 400 bytesWritten++; 401 } 402 } 403 404 return bytesWritten; 405 } 406 407 /** 408 * Decode a byte array of data. 409 * 410 * @param data The data array. 411 * @param out The output stream target for the decoded data. 412 * 413 * @return The number of bytes written to the stream. 414 * @exception IOException 415 */ 416 public int decodeWord(byte[] data, OutputStream out) throws IOException { 417 return decodeWord(data, 0, data.length, out); 418 } 419 420 421 /** 422 * decode the uuencoded byte data writing it to the given output stream 423 * 424 * @param data The array of byte data to decode. 425 * @param off Starting offset within the array. 426 * @param length The length of data to encode. 427 * @param out The output stream used to return the decoded data. 428 * 429 * @return the number of bytes produced. 430 * @exception IOException 431 */ 432 public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException { 433 // make sure we're writing to the correct stream 434 this.out = out; 435 436 int endOffset = off + length; 437 int bytesWritten = 0; 438 439 while (off < endOffset) { 440 byte ch = data[off++]; 441 442 // space characters were translated to '_' on encode, so we need to translate them back. 443 if (ch == '_') { 444 out.write(' '); 445 } 446 else if (ch == '=') { 447 // we found an encoded character. Reduce the 3 char sequence to one. 448 // but first, make sure we have two characters to work with. 449 if (off + 1 >= endOffset) { 450 throw new IOException("Invalid quoted printable encoding"); 451 } 452 // convert the two bytes back from hex. 453 byte b1 = data[off++]; 454 byte b2 = data[off++]; 455 456 // we've found an encoded carriage return. The next char needs to be a newline 457 if (b1 == '\r') { 458 if (b2 != '\n') { 459 throw new IOException("Invalid quoted printable encoding"); 460 } 461 // this was a soft linebreak inserted by the encoding. We just toss this away 462 // on decode. 463 } 464 else { 465 // this is a hex pair we need to convert back to a single byte. 466 byte c1 = decodingTable[b1]; 467 byte c2 = decodingTable[b2]; 468 out.write((c1 << 4) | c2); 469 // 3 bytes in, one byte out 470 bytesWritten++; 471 } 472 } 473 else { 474 // simple character, just write it out. 475 out.write(ch); 476 bytesWritten++; 477 } 478 } 479 480 return bytesWritten; 481 } 482 483 484 /** 485 * decode the UUEncoded String data writing it to the given output stream. 486 * 487 * @param data The String data to decode. 488 * @param out The output stream to write the decoded data to. 489 * 490 * @return the number of bytes produced. 491 * @exception IOException 492 */ 493 public int decode(String data, OutputStream out) throws IOException { 494 try { 495 // just get the byte data and decode. 496 byte[] bytes = data.getBytes("US-ASCII"); 497 return decode(bytes, 0, bytes.length, out); 498 } catch (UnsupportedEncodingException e) { 499 throw new IOException("Invalid UUEncoding"); 500 } 501 } 502 503 private void checkLineLength(int required) throws IOException { 504 // if we're at our line length limit, write out a soft line break and reset. 505 if ((lineCount + required) > lineLength ) { 506 out.write('='); 507 out.write('\r'); 508 out.write('\n'); 509 bytesWritten += 3; 510 lineCount = 0; 511 } 512 } 513 514 515 public void writeEncodedCharacter(int ch) throws IOException { 516 // we need 3 characters for an encoded value 517 checkLineLength(3); 518 out.write('='); 519 out.write(encodingTable[ch >> 4]); 520 out.write(encodingTable[ch & 0x0F]); 521 lineCount += 3; 522 bytesWritten += 3; 523 } 524 525 526 public void writeCharacter(int ch) throws IOException { 527 // we need 3 characters for an encoded value 528 checkLineLength(1); 529 out.write(ch); 530 lineCount++; 531 bytesWritten++; 532 } 533 534 535 public void writeEOL() throws IOException { 536 out.write('\r'); 537 out.write('\n'); 538 lineCount = 0; 539 bytesWritten += 3; 540 } 541 542 543 public int decode(InputStream in) throws IOException { 544 545 // we potentially need to scan over spans of whitespace characters to determine if they're real 546 // we just return blanks until the count goes to zero. 547 if (deferredWhitespace > 0) { 548 deferredWhitespace--; 549 return ' '; 550 } 551 552 // we may have needed to scan ahead to find the first non-blank character, which we would store here. 553 // hand that back once we're done with the blanks. 554 if (cachedCharacter != -1) { 555 int result = cachedCharacter; 556 cachedCharacter = -1; 557 return result; 558 } 559 560 int ch = in.read(); 561 562 // reflect back an EOF condition. 563 if (ch == -1) { 564 return -1; 565 } 566 567 // space characters are a pain. We need to scan ahead until we find a non-space character. 568 // if the character is a line terminator, we need to discard the blanks. 569 if (ch == ' ') { 570 // scan forward, counting the characters. 571 while ((ch = in.read()) == ' ') { 572 deferredWhitespace++; 573 } 574 575 // is this a lineend at the current location? 576 if (ch == -1 || ch == '\r' || ch == '\n') { 577 // those blanks we so zealously counted up don't really exist. Clear out the counter. 578 deferredWhitespace = 0; 579 // return the real significant character now. 580 return ch; 581 } 582 else { 583 // remember this character for later, after we've used up the deferred blanks. 584 cachedCharacter = ch; 585 // return this space. We did not include this one in the deferred count, so we're right in sync. 586 return ' '; 587 } 588 } 589 else if (ch == '=') { 590 int b1 = in.read(); 591 // we need to get two characters after the quotation marker 592 if (b1 == -1) { 593 throw new IOException("Truncated quoted printable data"); 594 } 595 int b2 = in.read(); 596 // we need to get two characters after the quotation marker 597 if (b2 == -1) { 598 throw new IOException("Truncated quoted printable data"); 599 } 600 601 // we've found an encoded carriage return. The next char needs to be a newline 602 if (b1 == '\r') { 603 if (b2 != '\n') { 604 throw new IOException("Invalid quoted printable encoding"); 605 } 606 // this was a soft linebreak inserted by the encoding. We just toss this away 607 // on decode. We need to return something, so recurse and decode the next. 608 return decode(in); 609 } 610 else { 611 // this is a hex pair we need to convert back to a single byte. 612 b1 = decodingTable[b1]; 613 b2 = decodingTable[b2]; 614 return (b1 << 4) | b2; 615 } 616 } 617 else { 618 return ch; 619 } 620 } 621 622 623 /** 624 * Perform RFC-2047 word encoding using Q-P data encoding. 625 * 626 * @param in The source for the encoded data. 627 * @param charset The charset tag to be added to each encoded data section. 628 * @param specials The set of special characters that we require to encoded. 629 * @param out The output stream where the encoded data is to be written. 630 * @param fold Controls whether separate sections of encoded data are separated by 631 * linebreaks or whitespace. 632 * 633 * @exception IOException 634 */ 635 public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException 636 { 637 // we need to scan ahead in a few places, which may require pushing characters back on to the stream. 638 // make sure we have a stream where this is possible. 639 PushbackInputStream inStream = new PushbackInputStream(in); 640 PrintStream writer = new PrintStream(out); 641 642 // segments of encoded data are limited to 76 byes, including the control sections. 643 int limit = 76 - 7 - charset.length(); 644 boolean firstLine = true; 645 StringBuffer encodedString = new StringBuffer(76); 646 647 while (true) { 648 649 // encode another segment of data. 650 encode(inStream, encodedString, specials, limit); 651 // nothing encoded means we've hit the end of the data. 652 if (encodedString.length() == 0) { 653 break; 654 } 655 // if we have more than one segment, we need to insert separators. Depending on whether folding 656 // was requested, this is either a blank or a linebreak. 657 if (!firstLine) { 658 if (fold) { 659 writer.print("\r\n"); 660 } 661 else { 662 writer.print(" "); 663 } 664 } 665 666 // add the encoded word header 667 writer.print("=?"); 668 writer.print(charset); 669 writer.print("?Q?"); 670 // the data 671 writer.print(encodedString.toString()); 672 // and the terminator mark 673 writer.print("?="); 674 writer.flush(); 675 676 // we reset the string buffer and reuse it. 677 encodedString.setLength(0); 678 } 679 } 680 } 681 682 683