001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020 package javax.mail.internet; 021 022 import java.io.BufferedInputStream; 023 import java.io.BufferedReader; 024 import java.io.ByteArrayInputStream; 025 import java.io.ByteArrayOutputStream; 026 import java.io.IOException; 027 import java.io.InputStream; 028 import java.io.InputStreamReader; 029 import java.io.OutputStream; 030 import java.io.UnsupportedEncodingException; 031 import java.util.HashMap; 032 import java.util.Map; 033 import java.util.NoSuchElementException; 034 import java.util.StringTokenizer; 035 036 import javax.activation.DataHandler; 037 import javax.activation.DataSource; 038 import javax.mail.MessagingException; 039 040 import org.apache.geronimo.mail.util.ASCIIUtil; 041 import org.apache.geronimo.mail.util.Base64; 042 import org.apache.geronimo.mail.util.Base64DecoderStream; 043 import org.apache.geronimo.mail.util.Base64Encoder; 044 import org.apache.geronimo.mail.util.Base64EncoderStream; 045 import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream; 046 import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream; 047 import org.apache.geronimo.mail.util.QuotedPrintableEncoder; 048 import org.apache.geronimo.mail.util.QuotedPrintable; 049 import org.apache.geronimo.mail.util.SessionUtil; 050 import org.apache.geronimo.mail.util.UUDecoderStream; 051 import org.apache.geronimo.mail.util.UUEncoderStream; 052 053 // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary". 054 // In addition, "uuencode" is also supported. The 055 056 /** 057 * @version $Rev: 467553 $ $Date: 2006-10-25 06:01:51 +0200 (Mi, 25. Okt 2006) $ 058 */ 059 public class MimeUtility { 060 061 private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords"; 062 private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict"; 063 064 private MimeUtility() { 065 } 066 067 public static final int ALL = -1; 068 069 private static String defaultJavaCharset; 070 private static String escapedChars = "\"\\\r\n"; 071 private static String linearWhiteSpace = " \t\r\n"; 072 073 private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~"; 074 private static String QP_TEXT_SPECIALS = "=_?"; 075 076 // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally, 077 // these values are loaded from a character mapping file. 078 private static Map java2mime; 079 private static Map mime2java; 080 081 static { 082 // we need to load the mapping tables used by javaCharset() and mimeCharset(). 083 loadCharacterSetMappings(); 084 } 085 086 public static InputStream decode(InputStream in, String encoding) throws MessagingException { 087 encoding = encoding.toLowerCase(); 088 089 // some encodies are just pass-throughs, with no real decoding. 090 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 091 return in; 092 } 093 else if (encoding.equals("base64")) { 094 return new Base64DecoderStream(in); 095 } 096 // UUEncode is known by a couple historical extension names too. 097 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 098 return new UUDecoderStream(in); 099 } 100 else if (encoding.equals("quoted-printable")) { 101 return new QuotedPrintableDecoderStream(in); 102 } 103 else { 104 throw new MessagingException("Unknown encoding " + encoding); 105 } 106 } 107 108 /** 109 * Decode a string of text obtained from a mail header into 110 * it's proper form. The text generally will consist of a 111 * string of tokens, some of which may be encoded using 112 * base64 encoding. 113 * 114 * @param text The text to decode. 115 * 116 * @return The decoded test string. 117 * @exception UnsupportedEncodingException 118 */ 119 public static String decodeText(String text) throws UnsupportedEncodingException { 120 // if the text contains any encoded tokens, those tokens will be marked with "=?". If the 121 // source string doesn't contain that sequent, no decoding is required. 122 if (text.indexOf("=?") < 0) { 123 return text; 124 } 125 126 // we have two sets of rules we can apply. 127 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) { 128 return decodeTextNonStrict(text); 129 } 130 131 int offset = 0; 132 int endOffset = text.length(); 133 134 int startWhiteSpace = -1; 135 int endWhiteSpace = -1; 136 137 StringBuffer decodedText = new StringBuffer(text.length()); 138 139 boolean previousTokenEncoded = false; 140 141 while (offset < endOffset) { 142 char ch = text.charAt(offset); 143 144 // is this a whitespace character? 145 if (linearWhiteSpace.indexOf(ch) != -1) { 146 startWhiteSpace = offset; 147 while (offset < endOffset) { 148 // step over the white space characters. 149 ch = text.charAt(offset); 150 if (linearWhiteSpace.indexOf(ch) != -1) { 151 offset++; 152 } 153 else { 154 // record the location of the first non lwsp and drop down to process the 155 // token characters. 156 endWhiteSpace = offset; 157 break; 158 } 159 } 160 } 161 else { 162 // we have a word token. We need to scan over the word and then try to parse it. 163 int wordStart = offset; 164 165 while (offset < endOffset) { 166 // step over the white space characters. 167 ch = text.charAt(offset); 168 if (linearWhiteSpace.indexOf(ch) == -1) { 169 offset++; 170 } 171 else { 172 break; 173 } 174 175 //NB: Trailing whitespace on these header strings will just be discarded. 176 } 177 // pull out the word token. 178 String word = text.substring(wordStart, offset); 179 // is the token encoded? decode the word 180 if (word.startsWith("=?")) { 181 try { 182 // if this gives a parsing failure, treat it like a non-encoded word. 183 String decodedWord = decodeWord(word); 184 185 // are any whitespace characters significant? Append 'em if we've got 'em. 186 if (!previousTokenEncoded) { 187 if (startWhiteSpace != -1) { 188 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 189 startWhiteSpace = -1; 190 } 191 } 192 // this is definitely a decoded token. 193 previousTokenEncoded = true; 194 // and add this to the text. 195 decodedText.append(decodedWord); 196 // we continue parsing from here...we allow parsing errors to fall through 197 // and get handled as normal text. 198 continue; 199 200 } catch (ParseException e) { 201 } 202 } 203 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 204 // if we have it. 205 if (startWhiteSpace != -1) { 206 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 207 startWhiteSpace = -1; 208 } 209 // this is not a decoded token. 210 previousTokenEncoded = false; 211 decodedText.append(word); 212 } 213 } 214 215 return decodedText.toString(); 216 } 217 218 219 /** 220 * Decode a string of text obtained from a mail header into 221 * it's proper form. The text generally will consist of a 222 * string of tokens, some of which may be encoded using 223 * base64 encoding. This is for non-strict decoded for mailers that 224 * violate the RFC 2047 restriction that decoded tokens must be delimited 225 * by linear white space. This will scan tokens looking for inner tokens 226 * enclosed in "=?" -- "?=" pairs. 227 * 228 * @param text The text to decode. 229 * 230 * @return The decoded test string. 231 * @exception UnsupportedEncodingException 232 */ 233 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException { 234 int offset = 0; 235 int endOffset = text.length(); 236 237 int startWhiteSpace = -1; 238 int endWhiteSpace = -1; 239 240 StringBuffer decodedText = new StringBuffer(text.length()); 241 242 boolean previousTokenEncoded = false; 243 244 while (offset < endOffset) { 245 char ch = text.charAt(offset); 246 247 // is this a whitespace character? 248 if (linearWhiteSpace.indexOf(ch) != -1) { 249 startWhiteSpace = offset; 250 while (offset < endOffset) { 251 // step over the white space characters. 252 ch = text.charAt(offset); 253 if (linearWhiteSpace.indexOf(ch) != -1) { 254 offset++; 255 } 256 else { 257 // record the location of the first non lwsp and drop down to process the 258 // token characters. 259 endWhiteSpace = offset; 260 break; 261 } 262 } 263 } 264 else { 265 // we're at the start of a word token. We potentially need to break this up into subtokens 266 int wordStart = offset; 267 268 while (offset < endOffset) { 269 // step over the white space characters. 270 ch = text.charAt(offset); 271 if (linearWhiteSpace.indexOf(ch) == -1) { 272 offset++; 273 } 274 else { 275 break; 276 } 277 278 //NB: Trailing whitespace on these header strings will just be discarded. 279 } 280 // pull out the word token. 281 String word = text.substring(wordStart, offset); 282 283 int decodeStart = 0; 284 285 // now scan and process each of the bits within here. 286 while (decodeStart < word.length()) { 287 int tokenStart = word.indexOf("=?", decodeStart); 288 if (tokenStart == -1) { 289 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 290 // if we have it. 291 if (startWhiteSpace != -1) { 292 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 293 startWhiteSpace = -1; 294 } 295 // this is not a decoded token. 296 previousTokenEncoded = false; 297 decodedText.append(word.substring(decodeStart)); 298 // we're finished. 299 break; 300 } 301 // we have something to process 302 else { 303 // we might have a normal token preceeding this. 304 if (tokenStart != decodeStart) { 305 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 306 // if we have it. 307 if (startWhiteSpace != -1) { 308 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 309 startWhiteSpace = -1; 310 } 311 // this is not a decoded token. 312 previousTokenEncoded = false; 313 decodedText.append(word.substring(decodeStart, tokenStart)); 314 } 315 316 // now find the end marker. 317 int tokenEnd = word.indexOf("?=", tokenStart); 318 // sigh, an invalid token. Treat this as plain text. 319 if (tokenEnd == -1) { 320 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 321 // if we have it. 322 if (startWhiteSpace != -1) { 323 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 324 startWhiteSpace = -1; 325 } 326 // this is not a decoded token. 327 previousTokenEncoded = false; 328 decodedText.append(word.substring(tokenStart)); 329 // we're finished. 330 break; 331 } 332 else { 333 // update our ticker 334 decodeStart = tokenEnd + 2; 335 336 String token = word.substring(tokenStart, tokenEnd); 337 try { 338 // if this gives a parsing failure, treat it like a non-encoded word. 339 String decodedWord = decodeWord(token); 340 341 // are any whitespace characters significant? Append 'em if we've got 'em. 342 if (!previousTokenEncoded) { 343 if (startWhiteSpace != -1) { 344 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 345 startWhiteSpace = -1; 346 } 347 } 348 // this is definitely a decoded token. 349 previousTokenEncoded = true; 350 // and add this to the text. 351 decodedText.append(decodedWord); 352 // we continue parsing from here...we allow parsing errors to fall through 353 // and get handled as normal text. 354 continue; 355 356 } catch (ParseException e) { 357 } 358 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 359 // if we have it. 360 if (startWhiteSpace != -1) { 361 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 362 startWhiteSpace = -1; 363 } 364 // this is not a decoded token. 365 previousTokenEncoded = false; 366 decodedText.append(token); 367 } 368 } 369 } 370 } 371 } 372 373 return decodedText.toString(); 374 } 375 376 /** 377 * Parse a string using the RFC 2047 rules for an "encoded-word" 378 * type. This encoding has the syntax: 379 * 380 * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" 381 * 382 * @param word The possibly encoded word value. 383 * 384 * @return The decoded word. 385 * @exception ParseException 386 * @exception UnsupportedEncodingException 387 */ 388 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException { 389 // encoded words start with the characters "=?". If this not an encoded word, we throw a 390 // ParseException for the caller. 391 392 if (!word.startsWith("=?")) { 393 throw new ParseException("Invalid RFC 2047 encoded-word: " + word); 394 } 395 396 int charsetPos = word.indexOf('?', 2); 397 if (charsetPos == -1) { 398 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word); 399 } 400 401 // pull out the character set information (this is the MIME name at this point). 402 String charset = word.substring(2, charsetPos).toLowerCase(); 403 404 // now pull out the encoding token the same way. 405 int encodingPos = word.indexOf('?', charsetPos + 1); 406 if (encodingPos == -1) { 407 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word); 408 } 409 410 String encoding = word.substring(charsetPos + 1, encodingPos); 411 412 // and finally the encoded text. 413 int encodedTextPos = word.indexOf("?=", encodingPos + 1); 414 if (encodedTextPos == -1) { 415 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word); 416 } 417 418 String encodedText = word.substring(encodingPos + 1, encodedTextPos); 419 420 // seems a bit silly to encode a null string, but easy to deal with. 421 if (encodedText.length() == 0) { 422 return ""; 423 } 424 425 try { 426 // the decoder writes directly to an output stream. 427 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length()); 428 429 byte[] encodedData = encodedText.getBytes("US-ASCII"); 430 431 // Base64 encoded? 432 if (encoding.equals("B")) { 433 Base64.decode(encodedData, out); 434 } 435 // maybe quoted printable. 436 else if (encoding.equals("Q")) { 437 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder(); 438 dataEncoder.decodeWord(encodedData, out); 439 } 440 else { 441 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding); 442 } 443 // get the decoded byte data and convert into a string. 444 byte[] decodedData = out.toByteArray(); 445 return new String(decodedData, javaCharset(charset)); 446 } catch (IOException e) { 447 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding"); 448 } 449 450 } 451 452 /** 453 * Wrap an encoder around a given output stream. 454 * 455 * @param out The output stream to wrap. 456 * @param encoding The name of the encoding. 457 * 458 * @return A instance of FilterOutputStream that manages on the fly 459 * encoding for the requested encoding type. 460 * @exception MessagingException 461 */ 462 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException { 463 // no encoding specified, so assume it goes out unchanged. 464 if (encoding == null) { 465 return out; 466 } 467 468 encoding = encoding.toLowerCase(); 469 470 // some encodies are just pass-throughs, with no real decoding. 471 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 472 return out; 473 } 474 else if (encoding.equals("base64")) { 475 return new Base64EncoderStream(out); 476 } 477 // UUEncode is known by a couple historical extension names too. 478 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 479 return new UUEncoderStream(out); 480 } 481 else if (encoding.equals("quoted-printable")) { 482 return new QuotedPrintableEncoderStream(out); 483 } 484 else { 485 throw new MessagingException("Unknown encoding " + encoding); 486 } 487 } 488 489 /** 490 * Wrap an encoder around a given output stream. 491 * 492 * @param out The output stream to wrap. 493 * @param encoding The name of the encoding. 494 * @param filename The filename of the data being sent (only used for UUEncode). 495 * 496 * @return A instance of FilterOutputStream that manages on the fly 497 * encoding for the requested encoding type. 498 * @exception MessagingException 499 */ 500 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException { 501 encoding = encoding.toLowerCase(); 502 503 // some encodies are just pass-throughs, with no real decoding. 504 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 505 return out; 506 } 507 else if (encoding.equals("base64")) { 508 return new Base64EncoderStream(out); 509 } 510 // UUEncode is known by a couple historical extension names too. 511 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 512 return new UUEncoderStream(out, filename); 513 } 514 else if (encoding.equals("quoted-printable")) { 515 return new QuotedPrintableEncoderStream(out); 516 } 517 else { 518 throw new MessagingException("Unknown encoding " + encoding); 519 } 520 } 521 522 523 public static String encodeText(String word) throws UnsupportedEncodingException { 524 return encodeText(word, null, null); 525 } 526 527 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException { 528 return encodeWord(word, charset, encoding, false); 529 } 530 531 public static String encodeWord(String word) throws UnsupportedEncodingException { 532 return encodeWord(word, null, null); 533 } 534 535 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException { 536 return encodeWord(word, charset, encoding, true); 537 } 538 539 540 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException { 541 542 // figure out what we need to encode this. 543 String encoder = ASCIIUtil.getTextTransferEncoding(word); 544 // all ascii? We can return this directly, 545 if (encoder.equals("7bit")) { 546 return word; 547 } 548 549 // if not given a charset, use the default. 550 if (charset == null) { 551 charset = getDefaultMIMECharset(); 552 } 553 554 // sort out the encoder. If not explicitly given, use the best guess we've already established. 555 if (encoding != null) { 556 if (encoding.equalsIgnoreCase("B")) { 557 encoder = "base64"; 558 } 559 else if (encoding.equalsIgnoreCase("Q")) { 560 encoder = "quoted-printable"; 561 } 562 else { 563 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding); 564 } 565 } 566 567 try { 568 // get the string bytes in the correct source charset 569 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset))); 570 ByteArrayOutputStream out = new ByteArrayOutputStream(); 571 572 if (encoder.equals("base64")) { 573 Base64Encoder dataEncoder = new Base64Encoder(); 574 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false)); 575 } 576 else { 577 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder(); 578 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false)); 579 } 580 581 byte[] bytes = out.toByteArray(); 582 return new String(bytes); 583 } catch (IOException e) { 584 throw new UnsupportedEncodingException("Invalid encoding"); 585 } 586 } 587 588 589 /** 590 * Examine the content of a data source and decide what type 591 * of transfer encoding should be used. For text streams, 592 * we'll decided between 7bit, quoted-printable, and base64. 593 * For binary content types, we'll use either 7bit or base64. 594 * 595 * @param handler The DataHandler associated with the content. 596 * 597 * @return The string name of an encoding used to transfer the content. 598 */ 599 public static String getEncoding(DataHandler handler) { 600 601 602 // if this handler has an associated data source, we can read directly from the 603 // data source to make this judgment. This is generally MUCH faster than asking the 604 // DataHandler to write out the data for us. 605 DataSource ds = handler.getDataSource(); 606 if (ds != null) { 607 return getEncoding(ds); 608 } 609 610 try { 611 // get a parser that allows us to make comparisons. 612 ContentType content = new ContentType(ds.getContentType()); 613 614 // The only access to the content bytes at this point is by asking the handler to write 615 // the information out to a stream. We're going to pipe this through a special stream 616 // that examines the bytes as they go by. 617 ContentCheckingOutputStream checker = new ContentCheckingOutputStream(); 618 619 handler.writeTo(checker); 620 621 // figure this out based on whether we believe this to be a text type or not. 622 if (content.match("text/*")) { 623 return checker.getTextTransferEncoding(); 624 } 625 else { 626 return checker.getBinaryTransferEncoding(); 627 } 628 629 } catch (Exception e) { 630 // any unexpected I/O exceptions we'll force to a "safe" fallback position. 631 return "base64"; 632 } 633 } 634 635 636 /** 637 * Determine the what transfer encoding should be used for 638 * data retrieved from a DataSource. 639 * 640 * @param source The DataSource for the transmitted data. 641 * 642 * @return The string name of the encoding form that should be used for 643 * the data. 644 */ 645 public static String getEncoding(DataSource source) { 646 InputStream in = null; 647 648 try { 649 // get a parser that allows us to make comparisons. 650 ContentType content = new ContentType(source.getContentType()); 651 652 // we're probably going to have to scan the data. 653 in = source.getInputStream(); 654 655 if (!content.match("text/*")) { 656 // Not purporting to be a text type? Examine the content to see we might be able to 657 // at least pretend it is an ascii type. 658 return ASCIIUtil.getBinaryTransferEncoding(in); 659 } 660 else { 661 return ASCIIUtil.getTextTransferEncoding(in); 662 } 663 } catch (Exception e) { 664 // this was a problem...not sure what makes sense here, so we'll assume it's binary 665 // and we need to transfer this using Base64 encoding. 666 return "base64"; 667 } finally { 668 // make sure we close the stream 669 try { 670 if (in != null) { 671 in.close(); 672 } 673 } catch (IOException e) { 674 } 675 } 676 } 677 678 679 /** 680 * Quote a "word" value. If the word contains any character from 681 * the specified "specials" list, this value is returned as a 682 * quoted strong. Otherwise, it is returned unchanged (an "atom"). 683 * 684 * @param word The word requiring quoting. 685 * @param specials The set of special characters that can't appear in an unquoted 686 * string. 687 * 688 * @return The quoted value. This will be unchanged if the word doesn't contain 689 * any of the designated special characters. 690 */ 691 public static String quote(String word, String specials) { 692 int wordLength = word.length(); 693 boolean requiresQuoting = false; 694 // scan the string looking for problem characters 695 for (int i =0; i < wordLength; i++) { 696 char ch = word.charAt(i); 697 // special escaped characters require escaping, which also implies quoting. 698 if (escapedChars.indexOf(ch) >= 0) { 699 return quoteAndEscapeString(word); 700 } 701 // now check for control characters or the designated special characters. 702 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) { 703 // we know this requires quoting, but we still need to scan the entire string to 704 // see if contains chars that require escaping. Just go ahead and treat it as if it does. 705 return quoteAndEscapeString(word); 706 } 707 } 708 return word; 709 } 710 711 /** 712 * Take a string and return it as a formatted quoted string, with 713 * all characters requiring escaping handled properly. 714 * 715 * @param word The string to quote. 716 * 717 * @return The quoted string. 718 */ 719 private static String quoteAndEscapeString(String word) { 720 int wordLength = word.length(); 721 // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars. 722 StringBuffer buffer = new StringBuffer(wordLength + 10); 723 // add the leading quote. 724 buffer.append('"'); 725 726 for (int i = 0; i < wordLength; i++) { 727 char ch = word.charAt(i); 728 // is this an escaped char? 729 if (escapedChars.indexOf(ch) >= 0) { 730 // add the escape marker before appending. 731 buffer.append('\\'); 732 } 733 buffer.append(ch); 734 } 735 // now the closing quote 736 buffer.append('"'); 737 return buffer.toString(); 738 } 739 740 /** 741 * Translate a MIME standard character set name into the Java 742 * equivalent. 743 * 744 * @param charset The MIME standard name. 745 * 746 * @return The Java equivalent for this name. 747 */ 748 public static String javaCharset(String charset) { 749 // nothing in, nothing out. 750 if (charset == null) { 751 return null; 752 } 753 754 String mappedCharset = (String)mime2java.get(charset.toLowerCase()); 755 // if there is no mapping, then the original name is used. Many of the MIME character set 756 // names map directly back into Java. The reverse isn't necessarily true. 757 return mappedCharset == null ? charset : mappedCharset; 758 } 759 760 /** 761 * Map a Java character set name into the MIME equivalent. 762 * 763 * @param charset The java character set name. 764 * 765 * @return The MIME standard equivalent for this character set name. 766 */ 767 public static String mimeCharset(String charset) { 768 // nothing in, nothing out. 769 if (charset == null) { 770 return null; 771 } 772 773 String mappedCharset = (String)java2mime.get(charset.toLowerCase()); 774 // if there is no mapping, then the original name is used. Many of the MIME character set 775 // names map directly back into Java. The reverse isn't necessarily true. 776 return mappedCharset == null ? charset : mappedCharset; 777 } 778 779 780 /** 781 * Get the default character set to use, in Java name format. 782 * This either be the value set with the mail.mime.charset 783 * system property or obtained from the file.encoding system 784 * property. If neither of these is set, we fall back to 785 * 8859_1 (basically US-ASCII). 786 * 787 * @return The character string value of the default character set. 788 */ 789 public static String getDefaultJavaCharset() { 790 String charset = SessionUtil.getProperty("mail.mime.charset"); 791 if (charset != null) { 792 return javaCharset(charset); 793 } 794 return SessionUtil.getProperty("file.encoding", "8859_1"); 795 } 796 797 /** 798 * Get the default character set to use, in MIME name format. 799 * This either be the value set with the mail.mime.charset 800 * system property or obtained from the file.encoding system 801 * property. If neither of these is set, we fall back to 802 * 8859_1 (basically US-ASCII). 803 * 804 * @return The character string value of the default character set. 805 */ 806 static String getDefaultMIMECharset() { 807 // if the property is specified, this can be used directly. 808 String charset = SessionUtil.getProperty("mail.mime.charset"); 809 if (charset != null) { 810 return charset; 811 } 812 813 // get the Java-defined default and map back to a MIME name. 814 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1")); 815 } 816 817 818 /** 819 * Load the default mapping tables used by the javaCharset() 820 * and mimeCharset() methods. By default, these tables are 821 * loaded from the /META-INF/javamail.charset.map file. If 822 * something goes wrong loading that file, we configure things 823 * with a default mapping table (which just happens to mimic 824 * what's in the default mapping file). 825 */ 826 static private void loadCharacterSetMappings() { 827 java2mime = new HashMap(); 828 mime2java = new HashMap(); 829 830 831 // normally, these come from a character map file contained in the jar file. 832 try { 833 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map"); 834 835 if (map != null) { 836 // get a reader for this so we can load. 837 BufferedReader reader = new BufferedReader(new InputStreamReader(map)); 838 839 readMappings(reader, java2mime); 840 readMappings(reader, mime2java); 841 } 842 } catch (Exception e) { 843 } 844 845 // if any sort of error occurred reading the preferred file version, we could end up with empty 846 // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at 847 // least a reasonable set of defaults. 848 849 // these mappings echo what's in the default file. 850 if (java2mime.isEmpty()) { 851 java2mime.put("8859_1", "ISO-8859-1"); 852 java2mime.put("iso8859_1", "ISO-8859-1"); 853 java2mime.put("iso8859-1", "ISO-8859-1"); 854 855 java2mime.put("8859_2", "ISO-8859-2"); 856 java2mime.put("iso8859_2", "ISO-8859-2"); 857 java2mime.put("iso8859-2", "ISO-8859-2"); 858 859 java2mime.put("8859_3", "ISO-8859-3"); 860 java2mime.put("iso8859_3", "ISO-8859-3"); 861 java2mime.put("iso8859-3", "ISO-8859-3"); 862 863 java2mime.put("8859_4", "ISO-8859-4"); 864 java2mime.put("iso8859_4", "ISO-8859-4"); 865 java2mime.put("iso8859-4", "ISO-8859-4"); 866 867 java2mime.put("8859_5", "ISO-8859-5"); 868 java2mime.put("iso8859_5", "ISO-8859-5"); 869 java2mime.put("iso8859-5", "ISO-8859-5"); 870 871 java2mime.put ("8859_6", "ISO-8859-6"); 872 java2mime.put("iso8859_6", "ISO-8859-6"); 873 java2mime.put("iso8859-6", "ISO-8859-6"); 874 875 java2mime.put("8859_7", "ISO-8859-7"); 876 java2mime.put("iso8859_7", "ISO-8859-7"); 877 java2mime.put("iso8859-7", "ISO-8859-7"); 878 879 java2mime.put("8859_8", "ISO-8859-8"); 880 java2mime.put("iso8859_8", "ISO-8859-8"); 881 java2mime.put("iso8859-8", "ISO-8859-8"); 882 883 java2mime.put("8859_9", "ISO-8859-9"); 884 java2mime.put("iso8859_9", "ISO-8859-9"); 885 java2mime.put("iso8859-9", "ISO-8859-9"); 886 887 java2mime.put("sjis", "Shift_JIS"); 888 java2mime.put ("jis", "ISO-2022-JP"); 889 java2mime.put("iso2022jp", "ISO-2022-JP"); 890 java2mime.put("euc_jp", "euc-jp"); 891 java2mime.put("koi8_r", "koi8-r"); 892 java2mime.put("euc_cn", "euc-cn"); 893 java2mime.put("euc_tw", "euc-tw"); 894 java2mime.put("euc_kr", "euc-kr"); 895 } 896 897 if (mime2java.isEmpty ()) { 898 mime2java.put("iso-2022-cn", "ISO2022CN"); 899 mime2java.put("iso-2022-kr", "ISO2022KR"); 900 mime2java.put("utf-8", "UTF8"); 901 mime2java.put("utf8", "UTF8"); 902 mime2java.put("ja_jp.iso2022-7", "ISO2022JP"); 903 mime2java.put("ja_jp.eucjp", "EUCJIS"); 904 mime2java.put ("euc-kr", "KSC5601"); 905 mime2java.put("euckr", "KSC5601"); 906 mime2java.put("us-ascii", "ISO-8859-1"); 907 mime2java.put("x-us-ascii", "ISO-8859-1"); 908 } 909 } 910 911 912 /** 913 * Read a section of a character map table and populate the 914 * target mapping table with the information. The table end 915 * is marked by a line starting with "--" and also ending with 916 * "--". Blank lines and comment lines (beginning with '#') are 917 * ignored. 918 * 919 * @param reader The source of the file information. 920 * @param table The mapping table used to store the information. 921 */ 922 static private void readMappings(BufferedReader reader, Map table) throws IOException { 923 // process lines to the EOF or the end of table marker. 924 while (true) { 925 String line = reader.readLine(); 926 // no line returned is an EOF 927 if (line == null) { 928 return; 929 } 930 931 // trim so we're not messed up by trailing blanks 932 line = line.trim(); 933 934 if (line.length() == 0 || line.startsWith("#")) { 935 continue; 936 } 937 938 // stop processing if this is the end-of-table marker. 939 if (line.startsWith("--") && line.endsWith("--")) { 940 return; 941 } 942 943 // we allow either blanks or tabs as token delimiters. 944 StringTokenizer tokenizer = new StringTokenizer(line, " \t"); 945 946 try { 947 String from = tokenizer.nextToken().toLowerCase(); 948 String to = tokenizer.nextToken(); 949 950 table.put(from, to); 951 } catch (NoSuchElementException e) { 952 // just ignore the line if invalid. 953 } 954 } 955 } 956 957 958 } 959 960 961 /** 962 * Utility class for examining content information written out 963 * by a DataHandler object. This stream gathers statistics on 964 * the stream so it can make transfer encoding determinations. 965 */ 966 class ContentCheckingOutputStream extends OutputStream { 967 private int asciiChars = 0; 968 private int nonAsciiChars = 0; 969 private boolean containsLongLines = false; 970 private boolean containsMalformedEOL = false; 971 private int previousChar = 0; 972 private int span = 0; 973 974 ContentCheckingOutputStream() { 975 } 976 977 public void write(byte[] data) throws IOException { 978 write(data, 0, data.length); 979 } 980 981 public void write(byte[] data, int offset, int length) throws IOException { 982 for (int i = 0; i < length; i++) { 983 write(data[offset + i]); 984 } 985 } 986 987 public void write(int ch) { 988 // we found a linebreak. Reset the line length counters on either one. We don't 989 // really need to validate here. 990 if (ch == '\n' || ch == '\r') { 991 // we found a newline, this is only valid if the previous char was the '\r' 992 if (ch == '\n') { 993 // malformed linebreak? force this to base64 encoding. 994 if (previousChar != '\r') { 995 containsMalformedEOL = true; 996 } 997 } 998 // hit a line end, reset our line length counter 999 span = 0; 1000 } 1001 else { 1002 span++; 1003 // the text has long lines, we can't transfer this as unencoded text. 1004 if (span > 998) { 1005 containsLongLines = true; 1006 } 1007 1008 // non-ascii character, we have to transfer this in binary. 1009 if (!ASCIIUtil.isAscii(ch)) { 1010 nonAsciiChars++; 1011 } 1012 else { 1013 asciiChars++; 1014 } 1015 } 1016 previousChar = ch; 1017 } 1018 1019 1020 public String getBinaryTransferEncoding() { 1021 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) { 1022 return "base64"; 1023 } 1024 else { 1025 return "7bit"; 1026 } 1027 } 1028 1029 public String getTextTransferEncoding() { 1030 // looking good so far, only valid chars here. 1031 if (nonAsciiChars == 0) { 1032 // does this contain long text lines? We need to use a Q-P encoding which will 1033 // be only slightly longer, but handles folding the longer lines. 1034 if (containsLongLines) { 1035 return "quoted-printable"; 1036 } 1037 else { 1038 // ideal! Easiest one to handle. 1039 return "7bit"; 1040 } 1041 } 1042 else { 1043 // mostly characters requiring encoding? Base64 is our best bet. 1044 if (nonAsciiChars > asciiChars) { 1045 return "base64"; 1046 } 1047 else { 1048 // Q-P encoding will use fewer bytes than the full Base64. 1049 return "quoted-printable"; 1050 } 1051 } 1052 } 1053 }