001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020 package javax.mail.internet;
021
022 import java.io.BufferedInputStream;
023 import java.io.BufferedReader;
024 import java.io.ByteArrayInputStream;
025 import java.io.ByteArrayOutputStream;
026 import java.io.IOException;
027 import java.io.InputStream;
028 import java.io.InputStreamReader;
029 import java.io.OutputStream;
030 import java.io.UnsupportedEncodingException;
031 import java.util.HashMap;
032 import java.util.Map;
033 import java.util.NoSuchElementException;
034 import java.util.StringTokenizer;
035
036 import javax.activation.DataHandler;
037 import javax.activation.DataSource;
038 import javax.mail.MessagingException;
039
040 import org.apache.geronimo.mail.util.ASCIIUtil;
041 import org.apache.geronimo.mail.util.Base64;
042 import org.apache.geronimo.mail.util.Base64DecoderStream;
043 import org.apache.geronimo.mail.util.Base64Encoder;
044 import org.apache.geronimo.mail.util.Base64EncoderStream;
045 import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
046 import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
047 import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
048 import org.apache.geronimo.mail.util.QuotedPrintable;
049 import org.apache.geronimo.mail.util.SessionUtil;
050 import org.apache.geronimo.mail.util.UUDecoderStream;
051 import org.apache.geronimo.mail.util.UUEncoderStream;
052
053 // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
054 // In addition, "uuencode" is also supported. The
055
056 /**
057 * @version $Rev: 467553 $ $Date: 2006-10-25 00:01:51 -0400 (Wed, 25 Oct 2006) $
058 */
059 public class MimeUtility {
060
061 private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
062 private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
063 private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
064 private static final int FOLD_THRESHOLD = 76;
065
066 private MimeUtility() {
067 }
068
069 public static final int ALL = -1;
070
071 private static String defaultJavaCharset;
072 private static String escapedChars = "\"\\\r\n";
073 private static String linearWhiteSpace = " \t\r\n";
074
075 private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
076 private static String QP_TEXT_SPECIALS = "=_?";
077
078 // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally,
079 // these values are loaded from a character mapping file.
080 private static Map java2mime;
081 private static Map mime2java;
082
083 static {
084 // we need to load the mapping tables used by javaCharset() and mimeCharset().
085 loadCharacterSetMappings();
086 }
087
088 public static InputStream decode(InputStream in, String encoding) throws MessagingException {
089 encoding = encoding.toLowerCase();
090
091 // some encodies are just pass-throughs, with no real decoding.
092 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
093 return in;
094 }
095 else if (encoding.equals("base64")) {
096 return new Base64DecoderStream(in);
097 }
098 // UUEncode is known by a couple historical extension names too.
099 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
100 return new UUDecoderStream(in);
101 }
102 else if (encoding.equals("quoted-printable")) {
103 return new QuotedPrintableDecoderStream(in);
104 }
105 else {
106 throw new MessagingException("Unknown encoding " + encoding);
107 }
108 }
109
110 /**
111 * Decode a string of text obtained from a mail header into
112 * it's proper form. The text generally will consist of a
113 * string of tokens, some of which may be encoded using
114 * base64 encoding.
115 *
116 * @param text The text to decode.
117 *
118 * @return The decoded test string.
119 * @exception UnsupportedEncodingException
120 */
121 public static String decodeText(String text) throws UnsupportedEncodingException {
122 // if the text contains any encoded tokens, those tokens will be marked with "=?". If the
123 // source string doesn't contain that sequent, no decoding is required.
124 if (text.indexOf("=?") < 0) {
125 return text;
126 }
127
128 // we have two sets of rules we can apply.
129 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
130 return decodeTextNonStrict(text);
131 }
132
133 int offset = 0;
134 int endOffset = text.length();
135
136 int startWhiteSpace = -1;
137 int endWhiteSpace = -1;
138
139 StringBuffer decodedText = new StringBuffer(text.length());
140
141 boolean previousTokenEncoded = false;
142
143 while (offset < endOffset) {
144 char ch = text.charAt(offset);
145
146 // is this a whitespace character?
147 if (linearWhiteSpace.indexOf(ch) != -1) {
148 startWhiteSpace = offset;
149 while (offset < endOffset) {
150 // step over the white space characters.
151 ch = text.charAt(offset);
152 if (linearWhiteSpace.indexOf(ch) != -1) {
153 offset++;
154 }
155 else {
156 // record the location of the first non lwsp and drop down to process the
157 // token characters.
158 endWhiteSpace = offset;
159 break;
160 }
161 }
162 }
163 else {
164 // we have a word token. We need to scan over the word and then try to parse it.
165 int wordStart = offset;
166
167 while (offset < endOffset) {
168 // step over the white space characters.
169 ch = text.charAt(offset);
170 if (linearWhiteSpace.indexOf(ch) == -1) {
171 offset++;
172 }
173 else {
174 break;
175 }
176
177 //NB: Trailing whitespace on these header strings will just be discarded.
178 }
179 // pull out the word token.
180 String word = text.substring(wordStart, offset);
181 // is the token encoded? decode the word
182 if (word.startsWith("=?")) {
183 try {
184 // if this gives a parsing failure, treat it like a non-encoded word.
185 String decodedWord = decodeWord(word);
186
187 // are any whitespace characters significant? Append 'em if we've got 'em.
188 if (!previousTokenEncoded) {
189 if (startWhiteSpace != -1) {
190 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
191 startWhiteSpace = -1;
192 }
193 }
194 // this is definitely a decoded token.
195 previousTokenEncoded = true;
196 // and add this to the text.
197 decodedText.append(decodedWord);
198 // we continue parsing from here...we allow parsing errors to fall through
199 // and get handled as normal text.
200 continue;
201
202 } catch (ParseException e) {
203 }
204 }
205 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
206 // if we have it.
207 if (startWhiteSpace != -1) {
208 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
209 startWhiteSpace = -1;
210 }
211 // this is not a decoded token.
212 previousTokenEncoded = false;
213 decodedText.append(word);
214 }
215 }
216
217 return decodedText.toString();
218 }
219
220
221 /**
222 * Decode a string of text obtained from a mail header into
223 * it's proper form. The text generally will consist of a
224 * string of tokens, some of which may be encoded using
225 * base64 encoding. This is for non-strict decoded for mailers that
226 * violate the RFC 2047 restriction that decoded tokens must be delimited
227 * by linear white space. This will scan tokens looking for inner tokens
228 * enclosed in "=?" -- "?=" pairs.
229 *
230 * @param text The text to decode.
231 *
232 * @return The decoded test string.
233 * @exception UnsupportedEncodingException
234 */
235 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
236 int offset = 0;
237 int endOffset = text.length();
238
239 int startWhiteSpace = -1;
240 int endWhiteSpace = -1;
241
242 StringBuffer decodedText = new StringBuffer(text.length());
243
244 boolean previousTokenEncoded = false;
245
246 while (offset < endOffset) {
247 char ch = text.charAt(offset);
248
249 // is this a whitespace character?
250 if (linearWhiteSpace.indexOf(ch) != -1) {
251 startWhiteSpace = offset;
252 while (offset < endOffset) {
253 // step over the white space characters.
254 ch = text.charAt(offset);
255 if (linearWhiteSpace.indexOf(ch) != -1) {
256 offset++;
257 }
258 else {
259 // record the location of the first non lwsp and drop down to process the
260 // token characters.
261 endWhiteSpace = offset;
262 break;
263 }
264 }
265 }
266 else {
267 // we're at the start of a word token. We potentially need to break this up into subtokens
268 int wordStart = offset;
269
270 while (offset < endOffset) {
271 // step over the white space characters.
272 ch = text.charAt(offset);
273 if (linearWhiteSpace.indexOf(ch) == -1) {
274 offset++;
275 }
276 else {
277 break;
278 }
279
280 //NB: Trailing whitespace on these header strings will just be discarded.
281 }
282 // pull out the word token.
283 String word = text.substring(wordStart, offset);
284
285 int decodeStart = 0;
286
287 // now scan and process each of the bits within here.
288 while (decodeStart < word.length()) {
289 int tokenStart = word.indexOf("=?", decodeStart);
290 if (tokenStart == -1) {
291 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
292 // if we have it.
293 if (startWhiteSpace != -1) {
294 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
295 startWhiteSpace = -1;
296 }
297 // this is not a decoded token.
298 previousTokenEncoded = false;
299 decodedText.append(word.substring(decodeStart));
300 // we're finished.
301 break;
302 }
303 // we have something to process
304 else {
305 // we might have a normal token preceeding this.
306 if (tokenStart != decodeStart) {
307 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
308 // if we have it.
309 if (startWhiteSpace != -1) {
310 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
311 startWhiteSpace = -1;
312 }
313 // this is not a decoded token.
314 previousTokenEncoded = false;
315 decodedText.append(word.substring(decodeStart, tokenStart));
316 }
317
318 // now find the end marker.
319 int tokenEnd = word.indexOf("?=", tokenStart);
320 // sigh, an invalid token. Treat this as plain text.
321 if (tokenEnd == -1) {
322 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
323 // if we have it.
324 if (startWhiteSpace != -1) {
325 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
326 startWhiteSpace = -1;
327 }
328 // this is not a decoded token.
329 previousTokenEncoded = false;
330 decodedText.append(word.substring(tokenStart));
331 // we're finished.
332 break;
333 }
334 else {
335 // update our ticker
336 decodeStart = tokenEnd + 2;
337
338 String token = word.substring(tokenStart, tokenEnd);
339 try {
340 // if this gives a parsing failure, treat it like a non-encoded word.
341 String decodedWord = decodeWord(token);
342
343 // are any whitespace characters significant? Append 'em if we've got 'em.
344 if (!previousTokenEncoded) {
345 if (startWhiteSpace != -1) {
346 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
347 startWhiteSpace = -1;
348 }
349 }
350 // this is definitely a decoded token.
351 previousTokenEncoded = true;
352 // and add this to the text.
353 decodedText.append(decodedWord);
354 // we continue parsing from here...we allow parsing errors to fall through
355 // and get handled as normal text.
356 continue;
357
358 } catch (ParseException e) {
359 }
360 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
361 // if we have it.
362 if (startWhiteSpace != -1) {
363 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
364 startWhiteSpace = -1;
365 }
366 // this is not a decoded token.
367 previousTokenEncoded = false;
368 decodedText.append(token);
369 }
370 }
371 }
372 }
373 }
374
375 return decodedText.toString();
376 }
377
378 /**
379 * Parse a string using the RFC 2047 rules for an "encoded-word"
380 * type. This encoding has the syntax:
381 *
382 * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
383 *
384 * @param word The possibly encoded word value.
385 *
386 * @return The decoded word.
387 * @exception ParseException
388 * @exception UnsupportedEncodingException
389 */
390 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
391 // encoded words start with the characters "=?". If this not an encoded word, we throw a
392 // ParseException for the caller.
393
394 if (!word.startsWith("=?")) {
395 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
396 }
397
398 int charsetPos = word.indexOf('?', 2);
399 if (charsetPos == -1) {
400 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
401 }
402
403 // pull out the character set information (this is the MIME name at this point).
404 String charset = word.substring(2, charsetPos).toLowerCase();
405
406 // now pull out the encoding token the same way.
407 int encodingPos = word.indexOf('?', charsetPos + 1);
408 if (encodingPos == -1) {
409 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
410 }
411
412 String encoding = word.substring(charsetPos + 1, encodingPos);
413
414 // and finally the encoded text.
415 int encodedTextPos = word.indexOf("?=", encodingPos + 1);
416 if (encodedTextPos == -1) {
417 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
418 }
419
420 String encodedText = word.substring(encodingPos + 1, encodedTextPos);
421
422 // seems a bit silly to encode a null string, but easy to deal with.
423 if (encodedText.length() == 0) {
424 return "";
425 }
426
427 try {
428 // the decoder writes directly to an output stream.
429 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
430
431 byte[] encodedData = encodedText.getBytes("US-ASCII");
432
433 // Base64 encoded?
434 if (encoding.equals("B")) {
435 Base64.decode(encodedData, out);
436 }
437 // maybe quoted printable.
438 else if (encoding.equals("Q")) {
439 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
440 dataEncoder.decodeWord(encodedData, out);
441 }
442 else {
443 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
444 }
445 // get the decoded byte data and convert into a string.
446 byte[] decodedData = out.toByteArray();
447 return new String(decodedData, javaCharset(charset));
448 } catch (IOException e) {
449 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
450 }
451
452 }
453
454 /**
455 * Wrap an encoder around a given output stream.
456 *
457 * @param out The output stream to wrap.
458 * @param encoding The name of the encoding.
459 *
460 * @return A instance of FilterOutputStream that manages on the fly
461 * encoding for the requested encoding type.
462 * @exception MessagingException
463 */
464 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
465 // no encoding specified, so assume it goes out unchanged.
466 if (encoding == null) {
467 return out;
468 }
469
470 encoding = encoding.toLowerCase();
471
472 // some encodies are just pass-throughs, with no real decoding.
473 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
474 return out;
475 }
476 else if (encoding.equals("base64")) {
477 return new Base64EncoderStream(out);
478 }
479 // UUEncode is known by a couple historical extension names too.
480 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
481 return new UUEncoderStream(out);
482 }
483 else if (encoding.equals("quoted-printable")) {
484 return new QuotedPrintableEncoderStream(out);
485 }
486 else {
487 throw new MessagingException("Unknown encoding " + encoding);
488 }
489 }
490
491 /**
492 * Wrap an encoder around a given output stream.
493 *
494 * @param out The output stream to wrap.
495 * @param encoding The name of the encoding.
496 * @param filename The filename of the data being sent (only used for UUEncode).
497 *
498 * @return A instance of FilterOutputStream that manages on the fly
499 * encoding for the requested encoding type.
500 * @exception MessagingException
501 */
502 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
503 encoding = encoding.toLowerCase();
504
505 // some encodies are just pass-throughs, with no real decoding.
506 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
507 return out;
508 }
509 else if (encoding.equals("base64")) {
510 return new Base64EncoderStream(out);
511 }
512 // UUEncode is known by a couple historical extension names too.
513 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
514 return new UUEncoderStream(out, filename);
515 }
516 else if (encoding.equals("quoted-printable")) {
517 return new QuotedPrintableEncoderStream(out);
518 }
519 else {
520 throw new MessagingException("Unknown encoding " + encoding);
521 }
522 }
523
524
525 public static String encodeText(String word) throws UnsupportedEncodingException {
526 return encodeText(word, null, null);
527 }
528
529 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
530 return encodeWord(word, charset, encoding, false);
531 }
532
533 public static String encodeWord(String word) throws UnsupportedEncodingException {
534 return encodeWord(word, null, null);
535 }
536
537 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
538 return encodeWord(word, charset, encoding, true);
539 }
540
541
542 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
543
544 // figure out what we need to encode this.
545 String encoder = ASCIIUtil.getTextTransferEncoding(word);
546 // all ascii? We can return this directly,
547 if (encoder.equals("7bit")) {
548 return word;
549 }
550
551 // if not given a charset, use the default.
552 if (charset == null) {
553 charset = getDefaultMIMECharset();
554 }
555
556 // sort out the encoder. If not explicitly given, use the best guess we've already established.
557 if (encoding != null) {
558 if (encoding.equalsIgnoreCase("B")) {
559 encoder = "base64";
560 }
561 else if (encoding.equalsIgnoreCase("Q")) {
562 encoder = "quoted-printable";
563 }
564 else {
565 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
566 }
567 }
568
569 try {
570 // get the string bytes in the correct source charset
571 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
572 ByteArrayOutputStream out = new ByteArrayOutputStream();
573
574 if (encoder.equals("base64")) {
575 Base64Encoder dataEncoder = new Base64Encoder();
576 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
577 }
578 else {
579 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
580 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
581 }
582
583 byte[] bytes = out.toByteArray();
584 return new String(bytes);
585 } catch (IOException e) {
586 throw new UnsupportedEncodingException("Invalid encoding");
587 }
588 }
589
590
591 /**
592 * Examine the content of a data source and decide what type
593 * of transfer encoding should be used. For text streams,
594 * we'll decided between 7bit, quoted-printable, and base64.
595 * For binary content types, we'll use either 7bit or base64.
596 *
597 * @param handler The DataHandler associated with the content.
598 *
599 * @return The string name of an encoding used to transfer the content.
600 */
601 public static String getEncoding(DataHandler handler) {
602
603
604 // if this handler has an associated data source, we can read directly from the
605 // data source to make this judgment. This is generally MUCH faster than asking the
606 // DataHandler to write out the data for us.
607 DataSource ds = handler.getDataSource();
608 if (ds != null) {
609 return getEncoding(ds);
610 }
611
612 try {
613 // get a parser that allows us to make comparisons.
614 ContentType content = new ContentType(ds.getContentType());
615
616 // The only access to the content bytes at this point is by asking the handler to write
617 // the information out to a stream. We're going to pipe this through a special stream
618 // that examines the bytes as they go by.
619 ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
620
621 handler.writeTo(checker);
622
623 // figure this out based on whether we believe this to be a text type or not.
624 if (content.match("text/*")) {
625 return checker.getTextTransferEncoding();
626 }
627 else {
628 return checker.getBinaryTransferEncoding();
629 }
630
631 } catch (Exception e) {
632 // any unexpected I/O exceptions we'll force to a "safe" fallback position.
633 return "base64";
634 }
635 }
636
637
638 /**
639 * Determine the what transfer encoding should be used for
640 * data retrieved from a DataSource.
641 *
642 * @param source The DataSource for the transmitted data.
643 *
644 * @return The string name of the encoding form that should be used for
645 * the data.
646 */
647 public static String getEncoding(DataSource source) {
648 InputStream in = null;
649
650 try {
651 // get a parser that allows us to make comparisons.
652 ContentType content = new ContentType(source.getContentType());
653
654 // we're probably going to have to scan the data.
655 in = source.getInputStream();
656
657 if (!content.match("text/*")) {
658 // Not purporting to be a text type? Examine the content to see we might be able to
659 // at least pretend it is an ascii type.
660 return ASCIIUtil.getBinaryTransferEncoding(in);
661 }
662 else {
663 return ASCIIUtil.getTextTransferEncoding(in);
664 }
665 } catch (Exception e) {
666 // this was a problem...not sure what makes sense here, so we'll assume it's binary
667 // and we need to transfer this using Base64 encoding.
668 return "base64";
669 } finally {
670 // make sure we close the stream
671 try {
672 if (in != null) {
673 in.close();
674 }
675 } catch (IOException e) {
676 }
677 }
678 }
679
680
681 /**
682 * Quote a "word" value. If the word contains any character from
683 * the specified "specials" list, this value is returned as a
684 * quoted strong. Otherwise, it is returned unchanged (an "atom").
685 *
686 * @param word The word requiring quoting.
687 * @param specials The set of special characters that can't appear in an unquoted
688 * string.
689 *
690 * @return The quoted value. This will be unchanged if the word doesn't contain
691 * any of the designated special characters.
692 */
693 public static String quote(String word, String specials) {
694 int wordLength = word.length();
695 boolean requiresQuoting = false;
696 // scan the string looking for problem characters
697 for (int i =0; i < wordLength; i++) {
698 char ch = word.charAt(i);
699 // special escaped characters require escaping, which also implies quoting.
700 if (escapedChars.indexOf(ch) >= 0) {
701 return quoteAndEscapeString(word);
702 }
703 // now check for control characters or the designated special characters.
704 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
705 // we know this requires quoting, but we still need to scan the entire string to
706 // see if contains chars that require escaping. Just go ahead and treat it as if it does.
707 return quoteAndEscapeString(word);
708 }
709 }
710 return word;
711 }
712
713 /**
714 * Take a string and return it as a formatted quoted string, with
715 * all characters requiring escaping handled properly.
716 *
717 * @param word The string to quote.
718 *
719 * @return The quoted string.
720 */
721 private static String quoteAndEscapeString(String word) {
722 int wordLength = word.length();
723 // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
724 StringBuffer buffer = new StringBuffer(wordLength + 10);
725 // add the leading quote.
726 buffer.append('"');
727
728 for (int i = 0; i < wordLength; i++) {
729 char ch = word.charAt(i);
730 // is this an escaped char?
731 if (escapedChars.indexOf(ch) >= 0) {
732 // add the escape marker before appending.
733 buffer.append('\\');
734 }
735 buffer.append(ch);
736 }
737 // now the closing quote
738 buffer.append('"');
739 return buffer.toString();
740 }
741
742 /**
743 * Translate a MIME standard character set name into the Java
744 * equivalent.
745 *
746 * @param charset The MIME standard name.
747 *
748 * @return The Java equivalent for this name.
749 */
750 public static String javaCharset(String charset) {
751 // nothing in, nothing out.
752 if (charset == null) {
753 return null;
754 }
755
756 String mappedCharset = (String)mime2java.get(charset.toLowerCase());
757 // if there is no mapping, then the original name is used. Many of the MIME character set
758 // names map directly back into Java. The reverse isn't necessarily true.
759 return mappedCharset == null ? charset : mappedCharset;
760 }
761
762 /**
763 * Map a Java character set name into the MIME equivalent.
764 *
765 * @param charset The java character set name.
766 *
767 * @return The MIME standard equivalent for this character set name.
768 */
769 public static String mimeCharset(String charset) {
770 // nothing in, nothing out.
771 if (charset == null) {
772 return null;
773 }
774
775 String mappedCharset = (String)java2mime.get(charset.toLowerCase());
776 // if there is no mapping, then the original name is used. Many of the MIME character set
777 // names map directly back into Java. The reverse isn't necessarily true.
778 return mappedCharset == null ? charset : mappedCharset;
779 }
780
781
782 /**
783 * Get the default character set to use, in Java name format.
784 * This either be the value set with the mail.mime.charset
785 * system property or obtained from the file.encoding system
786 * property. If neither of these is set, we fall back to
787 * 8859_1 (basically US-ASCII).
788 *
789 * @return The character string value of the default character set.
790 */
791 public static String getDefaultJavaCharset() {
792 String charset = SessionUtil.getProperty("mail.mime.charset");
793 if (charset != null) {
794 return javaCharset(charset);
795 }
796 return SessionUtil.getProperty("file.encoding", "8859_1");
797 }
798
799 /**
800 * Get the default character set to use, in MIME name format.
801 * This either be the value set with the mail.mime.charset
802 * system property or obtained from the file.encoding system
803 * property. If neither of these is set, we fall back to
804 * 8859_1 (basically US-ASCII).
805 *
806 * @return The character string value of the default character set.
807 */
808 static String getDefaultMIMECharset() {
809 // if the property is specified, this can be used directly.
810 String charset = SessionUtil.getProperty("mail.mime.charset");
811 if (charset != null) {
812 return charset;
813 }
814
815 // get the Java-defined default and map back to a MIME name.
816 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
817 }
818
819
820 /**
821 * Load the default mapping tables used by the javaCharset()
822 * and mimeCharset() methods. By default, these tables are
823 * loaded from the /META-INF/javamail.charset.map file. If
824 * something goes wrong loading that file, we configure things
825 * with a default mapping table (which just happens to mimic
826 * what's in the default mapping file).
827 */
828 static private void loadCharacterSetMappings() {
829 java2mime = new HashMap();
830 mime2java = new HashMap();
831
832
833 // normally, these come from a character map file contained in the jar file.
834 try {
835 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
836
837 if (map != null) {
838 // get a reader for this so we can load.
839 BufferedReader reader = new BufferedReader(new InputStreamReader(map));
840
841 readMappings(reader, java2mime);
842 readMappings(reader, mime2java);
843 }
844 } catch (Exception e) {
845 }
846
847 // if any sort of error occurred reading the preferred file version, we could end up with empty
848 // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at
849 // least a reasonable set of defaults.
850
851 // these mappings echo what's in the default file.
852 if (java2mime.isEmpty()) {
853 java2mime.put("8859_1", "ISO-8859-1");
854 java2mime.put("iso8859_1", "ISO-8859-1");
855 java2mime.put("iso8859-1", "ISO-8859-1");
856
857 java2mime.put("8859_2", "ISO-8859-2");
858 java2mime.put("iso8859_2", "ISO-8859-2");
859 java2mime.put("iso8859-2", "ISO-8859-2");
860
861 java2mime.put("8859_3", "ISO-8859-3");
862 java2mime.put("iso8859_3", "ISO-8859-3");
863 java2mime.put("iso8859-3", "ISO-8859-3");
864
865 java2mime.put("8859_4", "ISO-8859-4");
866 java2mime.put("iso8859_4", "ISO-8859-4");
867 java2mime.put("iso8859-4", "ISO-8859-4");
868
869 java2mime.put("8859_5", "ISO-8859-5");
870 java2mime.put("iso8859_5", "ISO-8859-5");
871 java2mime.put("iso8859-5", "ISO-8859-5");
872
873 java2mime.put ("8859_6", "ISO-8859-6");
874 java2mime.put("iso8859_6", "ISO-8859-6");
875 java2mime.put("iso8859-6", "ISO-8859-6");
876
877 java2mime.put("8859_7", "ISO-8859-7");
878 java2mime.put("iso8859_7", "ISO-8859-7");
879 java2mime.put("iso8859-7", "ISO-8859-7");
880
881 java2mime.put("8859_8", "ISO-8859-8");
882 java2mime.put("iso8859_8", "ISO-8859-8");
883 java2mime.put("iso8859-8", "ISO-8859-8");
884
885 java2mime.put("8859_9", "ISO-8859-9");
886 java2mime.put("iso8859_9", "ISO-8859-9");
887 java2mime.put("iso8859-9", "ISO-8859-9");
888
889 java2mime.put("sjis", "Shift_JIS");
890 java2mime.put ("jis", "ISO-2022-JP");
891 java2mime.put("iso2022jp", "ISO-2022-JP");
892 java2mime.put("euc_jp", "euc-jp");
893 java2mime.put("koi8_r", "koi8-r");
894 java2mime.put("euc_cn", "euc-cn");
895 java2mime.put("euc_tw", "euc-tw");
896 java2mime.put("euc_kr", "euc-kr");
897 }
898
899 if (mime2java.isEmpty ()) {
900 mime2java.put("iso-2022-cn", "ISO2022CN");
901 mime2java.put("iso-2022-kr", "ISO2022KR");
902 mime2java.put("utf-8", "UTF8");
903 mime2java.put("utf8", "UTF8");
904 mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
905 mime2java.put("ja_jp.eucjp", "EUCJIS");
906 mime2java.put ("euc-kr", "KSC5601");
907 mime2java.put("euckr", "KSC5601");
908 mime2java.put("us-ascii", "ISO-8859-1");
909 mime2java.put("x-us-ascii", "ISO-8859-1");
910 }
911 }
912
913
914 /**
915 * Read a section of a character map table and populate the
916 * target mapping table with the information. The table end
917 * is marked by a line starting with "--" and also ending with
918 * "--". Blank lines and comment lines (beginning with '#') are
919 * ignored.
920 *
921 * @param reader The source of the file information.
922 * @param table The mapping table used to store the information.
923 */
924 static private void readMappings(BufferedReader reader, Map table) throws IOException {
925 // process lines to the EOF or the end of table marker.
926 while (true) {
927 String line = reader.readLine();
928 // no line returned is an EOF
929 if (line == null) {
930 return;
931 }
932
933 // trim so we're not messed up by trailing blanks
934 line = line.trim();
935
936 if (line.length() == 0 || line.startsWith("#")) {
937 continue;
938 }
939
940 // stop processing if this is the end-of-table marker.
941 if (line.startsWith("--") && line.endsWith("--")) {
942 return;
943 }
944
945 // we allow either blanks or tabs as token delimiters.
946 StringTokenizer tokenizer = new StringTokenizer(line, " \t");
947
948 try {
949 String from = tokenizer.nextToken().toLowerCase();
950 String to = tokenizer.nextToken();
951
952 table.put(from, to);
953 } catch (NoSuchElementException e) {
954 // just ignore the line if invalid.
955 }
956 }
957 }
958
959
960 /**
961 * Perform RFC 2047 text folding on a string of text.
962 *
963 * @param used The amount of text already "used up" on this line. This is
964 * typically the length of a message header that this text
965 * get getting added to.
966 * @param s The text to fold.
967 *
968 * @return The input text, with linebreaks inserted at appropriate fold points.
969 */
970 public static String fold(int used, String s) {
971 // if folding is disable, unfolding is also. Return the string unchanged.
972 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
973 return s;
974 }
975
976 int end;
977
978 // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
979 // and line break characters.
980 for (end = s.length() - 1; end >= 0; end--) {
981 int ch = s.charAt(end);
982 if (ch != ' ' && ch != '\t' ) {
983 break;
984 }
985 }
986
987 // did we actually find something to remove? Shorten the String to the trimmed length
988 if (end != s.length() - 1) {
989 s = s.substring(0, end + 1);
990 }
991
992 // does the string as it exists now not require folding? We can just had that back right off.
993 if (s.length() + used <= FOLD_THRESHOLD) {
994 return s;
995 }
996
997 // get a buffer for the length of the string, plus room for a few line breaks.
998 // these are soft line breaks, so we generally need more that just the line breaks (an escape +
999 // CR + LF + leading space on next line);
1000 StringBuffer newString = new StringBuffer(s.length() + 8);
1001
1002
1003 // now keep chopping this down until we've accomplished what we need.
1004 while (used + s.length() > FOLD_THRESHOLD) {
1005 int breakPoint = -1;
1006 char breakChar = 0;
1007
1008 // now scan for the next place where we can break.
1009 for (int i = 0; i < s.length(); i++) {
1010 // have we passed the fold limit?
1011 if (used + i > FOLD_THRESHOLD) {
1012 // if we've already seen a blank, then stop now. Otherwise
1013 // we keep going until we hit a fold point.
1014 if (breakPoint != -1) {
1015 break;
1016 }
1017 }
1018 char ch = s.charAt(i);
1019
1020 // a white space character?
1021 if (ch == ' ' || ch == '\t') {
1022 // this might be a run of white space, so skip over those now.
1023 breakPoint = i;
1024 // we need to maintain the same character type after the inserted linebreak.
1025 breakChar = ch;
1026 i++;
1027 while (i < s.length()) {
1028 ch = s.charAt(i);
1029 if (ch != ' ' && ch != '\t') {
1030 break;
1031 }
1032 i++;
1033 }
1034 }
1035 // found an embedded new line. Escape this so that the unfolding process preserves it.
1036 else if (ch == '\n') {
1037 newString.append('\\');
1038 newString.append('\n');
1039 }
1040 else if (ch == '\r') {
1041 newString.append('\\');
1042 newString.append('\n');
1043 i++;
1044 // if this is a CRLF pair, add the second char also
1045 if (i < s.length() && s.charAt(i) == '\n') {
1046 newString.append('\r');
1047 }
1048 }
1049
1050 }
1051 // no fold point found, we punt, append the remainder and leave.
1052 if (breakPoint == -1) {
1053 newString.append(s);
1054 return newString.toString();
1055 }
1056 newString.append(s.substring(0, breakPoint));
1057 newString.append("\r\n");
1058 newString.append(breakChar);
1059 // chop the string
1060 s = s.substring(breakPoint + 1);
1061 // start again, and we've used the first char of the limit already with the whitespace char.
1062 used = 1;
1063 }
1064
1065 // add on the remainder, and return
1066 newString.append(s);
1067 return newString.toString();
1068 }
1069
1070 /**
1071 * Unfold a folded string. The unfolding process will remove
1072 * any line breaks that are not escaped and which are also followed
1073 * by whitespace characters.
1074 *
1075 * @param s The folded string.
1076 *
1077 * @return A new string with unfolding rules applied.
1078 */
1079 public static String unfold(String s) {
1080 // if folding is disable, unfolding is also. Return the string unchanged.
1081 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
1082 return s;
1083 }
1084
1085 // if there are no line break characters in the string, we can just return this.
1086 if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
1087 return s;
1088 }
1089
1090 // we need to scan and fix things up.
1091 int length = s.length();
1092
1093 StringBuffer newString = new StringBuffer(length);
1094
1095 // scan the entire string
1096 for (int i = 0; i < length; i++) {
1097 char ch = s.charAt(i);
1098
1099 // we have a backslash. In folded strings, escape characters are only processed as such if
1100 // they preceed line breaks. Otherwise, we leave it be.
1101 if (ch == '\\') {
1102 // escape at the very end? Just add the character.
1103 if (i == length - 1) {
1104 newString.append(ch);
1105 }
1106 else {
1107 int nextChar = s.charAt(i + 1);
1108
1109 // naked newline? Add the new line to the buffer, and skip the escape char.
1110 if (nextChar == '\n') {
1111 newString.append('\n');
1112 i++;
1113 }
1114 else if (nextChar == '\r') {
1115 // just the CR left? Add it, removing the escape.
1116 if (i == length - 2 || s.charAt(i + 2) != '\r') {
1117 newString.append('\r');
1118 i++;
1119 }
1120 else {
1121 // toss the escape, add both parts of the CRLF, and skip over two chars.
1122 newString.append('\r');
1123 newString.append('\n');
1124 i += 2;
1125 }
1126 }
1127 else {
1128 // an escape for another purpose, just copy it over.
1129 newString.append(ch);
1130 }
1131 }
1132 }
1133 // we have an unescaped line break
1134 else if (ch == '\n' || ch == '\r') {
1135 // remember the position in case we need to backtrack.
1136 int lineBreak = i;
1137 boolean CRLF = false;
1138
1139 if (ch == '\r') {
1140 // check to see if we need to step over this.
1141 if (i < length - 1 && s.charAt(i + 1) == '\n') {
1142 i++;
1143 // flag the type so we know what we might need to preserve.
1144 CRLF = true;
1145 }
1146 }
1147
1148 // get a temp position scanner.
1149 int scan = i + 1;
1150
1151 // does a blank follow this new line? we need to scrap the new line and reduce the leading blanks
1152 // down to a single blank.
1153 if (scan < length && s.charAt(scan) == ' ') {
1154 // add the character
1155 newString.append(' ');
1156
1157 // scan over the rest of the blanks
1158 i = scan + 1;
1159 while (i < length && s.charAt(i) == ' ') {
1160 i++;
1161 }
1162 // we'll increment down below, so back up to the last blank as the current char.
1163 i--;
1164 }
1165 else {
1166 // we must keep this line break. Append the appropriate style.
1167 if (CRLF) {
1168 newString.append("\r\n");
1169 }
1170 else {
1171 newString.append(ch);
1172 }
1173 }
1174 }
1175 else {
1176 // just a normal, ordinary character
1177 newString.append(ch);
1178 }
1179 }
1180 return newString.toString();
1181 }
1182 }
1183
1184
1185 /**
1186 * Utility class for examining content information written out
1187 * by a DataHandler object. This stream gathers statistics on
1188 * the stream so it can make transfer encoding determinations.
1189 */
1190 class ContentCheckingOutputStream extends OutputStream {
1191 private int asciiChars = 0;
1192 private int nonAsciiChars = 0;
1193 private boolean containsLongLines = false;
1194 private boolean containsMalformedEOL = false;
1195 private int previousChar = 0;
1196 private int span = 0;
1197
1198 ContentCheckingOutputStream() {
1199 }
1200
1201 public void write(byte[] data) throws IOException {
1202 write(data, 0, data.length);
1203 }
1204
1205 public void write(byte[] data, int offset, int length) throws IOException {
1206 for (int i = 0; i < length; i++) {
1207 write(data[offset + i]);
1208 }
1209 }
1210
1211 public void write(int ch) {
1212 // we found a linebreak. Reset the line length counters on either one. We don't
1213 // really need to validate here.
1214 if (ch == '\n' || ch == '\r') {
1215 // we found a newline, this is only valid if the previous char was the '\r'
1216 if (ch == '\n') {
1217 // malformed linebreak? force this to base64 encoding.
1218 if (previousChar != '\r') {
1219 containsMalformedEOL = true;
1220 }
1221 }
1222 // hit a line end, reset our line length counter
1223 span = 0;
1224 }
1225 else {
1226 span++;
1227 // the text has long lines, we can't transfer this as unencoded text.
1228 if (span > 998) {
1229 containsLongLines = true;
1230 }
1231
1232 // non-ascii character, we have to transfer this in binary.
1233 if (!ASCIIUtil.isAscii(ch)) {
1234 nonAsciiChars++;
1235 }
1236 else {
1237 asciiChars++;
1238 }
1239 }
1240 previousChar = ch;
1241 }
1242
1243
1244 public String getBinaryTransferEncoding() {
1245 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1246 return "base64";
1247 }
1248 else {
1249 return "7bit";
1250 }
1251 }
1252
1253 public String getTextTransferEncoding() {
1254 // looking good so far, only valid chars here.
1255 if (nonAsciiChars == 0) {
1256 // does this contain long text lines? We need to use a Q-P encoding which will
1257 // be only slightly longer, but handles folding the longer lines.
1258 if (containsLongLines) {
1259 return "quoted-printable";
1260 }
1261 else {
1262 // ideal! Easiest one to handle.
1263 return "7bit";
1264 }
1265 }
1266 else {
1267 // mostly characters requiring encoding? Base64 is our best bet.
1268 if (nonAsciiChars > asciiChars) {
1269 return "base64";
1270 }
1271 else {
1272 // Q-P encoding will use fewer bytes than the full Base64.
1273 return "quoted-printable";
1274 }
1275 }
1276 }
1277 }