summaryrefslogtreecommitdiffstats
path: root/java/com/android/voicemail/impl/mail/internet/MimeUtility.java
blob: bd85e478c7e4f822b60320baa02f7deeb5584415 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.android.voicemail.impl.mail.internet;

import android.text.TextUtils;
import android.util.Base64;
import android.util.Base64DataException;
import android.util.Base64InputStream;
import com.android.voicemail.impl.VvmLog;
import com.android.voicemail.impl.mail.Body;
import com.android.voicemail.impl.mail.BodyPart;
import com.android.voicemail.impl.mail.Message;
import com.android.voicemail.impl.mail.MessagingException;
import com.android.voicemail.impl.mail.Multipart;
import com.android.voicemail.impl.mail.Part;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.james.mime4j.codec.DecodeMonitor;
import org.apache.james.mime4j.codec.DecoderUtil;
import org.apache.james.mime4j.codec.EncoderUtil;
import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
import org.apache.james.mime4j.util.CharsetUtil;

public class MimeUtility {
  private static final String LOG_TAG = "Email";

  public static final String MIME_TYPE_RFC822 = "message/rfc822";
  private static final Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");

  /**
   * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string object whenever
   * possible.
   */
  public static String unfold(String s) {
    if (s == null) {
      return null;
    }
    Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
    if (patternMatcher.find()) {
      patternMatcher.reset();
      s = patternMatcher.replaceAll("");
    }
    return s;
  }

  public static String decode(String s) {
    if (s == null) {
      return null;
    }
    return DecoderUtil.decodeEncodedWords(s, DecodeMonitor.STRICT);
  }

  public static String unfoldAndDecode(String s) {
    return decode(unfold(s));
  }

  // TODO implement proper foldAndEncode
  // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
  // duplication of encoding.
  public static String foldAndEncode(String s) {
    return s;
  }

  /**
   * INTERIM version of foldAndEncode that will be used only by Subject: headers. This is safer than
   * implementing foldAndEncode() (see above) and risking unknown damage to other headers.
   *
   * <p>TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
   *
   * @param s original string to encode and fold
   * @param usedCharacters number of characters already used up by header name
   * @return the String ready to be transmitted
   */
  public static String foldAndEncode2(String s, int usedCharacters) {
    // james.mime4j.codec.EncoderUtil.java
    // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
    // Usage.TEXT_TOKENlooks like the right thing for subjects
    // use WORD_ENTITY for address/names

    String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, usedCharacters);

    return fold(encoded, usedCharacters);
  }

  /**
   * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
   * MimeUtil class).
   *
   * <p>Splits the specified string into a multiple-line representation with lines no longer than 76
   * characters (because the line might contain encoded words; see <a
   * href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 2047</a> section 2). If the string contains
   * non-whitespace sequences longer than 76 characters a line break is inserted at the whitespace
   * character following the sequence resulting in a line longer than 76 characters.
   *
   * @param s string to split.
   * @param usedCharacters number of characters already used up. Usually the number of characters
   *     for header field name plus colon and one space.
   * @return a multiple-line representation of the given string.
   */
  public static String fold(String s, int usedCharacters) {
    final int maxCharacters = 76;

    final int length = s.length();
    if (usedCharacters + length <= maxCharacters) {
      return s;
    }

    StringBuilder sb = new StringBuilder();

    int lastLineBreak = -usedCharacters;
    int wspIdx = indexOfWsp(s, 0);
    while (true) {
      if (wspIdx == length) {
        sb.append(s.substring(Math.max(0, lastLineBreak)));
        return sb.toString();
      }

      int nextWspIdx = indexOfWsp(s, wspIdx + 1);

      if (nextWspIdx - lastLineBreak > maxCharacters) {
        sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
        sb.append("\r\n");
        lastLineBreak = wspIdx;
      }

      wspIdx = nextWspIdx;
    }
  }

  /**
   * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
   * MimeUtil class).
   *
   * <p>Search for whitespace.
   */
  private static int indexOfWsp(String s, int fromIndex) {
    final int len = s.length();
    for (int index = fromIndex; index < len; index++) {
      char c = s.charAt(index);
      if (c == ' ' || c == '\t') {
        return index;
      }
    }
    return len;
  }

  /**
   * Returns the named parameter of a header field. If name is null the first parameter is returned,
   * or if there are no additional parameters in the field the entire field is returned. Otherwise
   * the named parameter is searched for in a case insensitive fashion and returned. If the
   * parameter cannot be found the method returns null.
   *
   * <p>TODO: quite inefficient with the inner trimming & splitting. TODO: Also has a latent bug:
   * uses "startsWith" to match the name, which can false-positive. TODO: The doc says that for a
   * null name you get the first param, but you get the header. Should probably just fix the doc,
   * but if other code assumes that behavior, fix the code. TODO: Need to decode %-escaped strings,
   * as in: filename="ab%22d". ('+' -> ' ' conversion too? check RFC)
   *
   * @param header
   * @param name
   * @return the entire header (if name=null), the found parameter, or null
   */
  public static String getHeaderParameter(String header, String name) {
    if (header == null) {
      return null;
    }
    String[] parts = unfold(header).split(";");
    if (name == null) {
      return parts[0].trim();
    }
    String lowerCaseName = name.toLowerCase();
    for (String part : parts) {
      if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
        String[] parameterParts = part.split("=", 2);
        if (parameterParts.length < 2) {
          return null;
        }
        String parameter = parameterParts[1].trim();
        if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
          return parameter.substring(1, parameter.length() - 1);
        } else {
          return parameter;
        }
      }
    }
    return null;
  }

  /**
   * Reads the Part's body and returns a String based on any charset conversion that needed to be
   * done.
   *
   * @param part The part containing a body
   * @return a String containing the converted text in the body, or null if there was no text or an
   *     error during conversion.
   */
  public static String getTextFromPart(Part part) {
    try {
      if (part != null && part.getBody() != null) {
        InputStream in = part.getBody().getInputStream();
        String mimeType = part.getMimeType();
        if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
          /*
           * Now we read the part into a buffer for further processing. Because
           * the stream is now wrapped we'll remove any transfer encoding at this point.
           */
          ByteArrayOutputStream out = new ByteArrayOutputStream();
          IOUtils.copy(in, out);
          in.close();
          in = null; // we want all of our memory back, and close might not release

          /*
           * We've got a text part, so let's see if it needs to be processed further.
           */
          String charset = getHeaderParameter(part.getContentType(), "charset");
          if (charset != null) {
            /*
             * See if there is conversion from the MIME charset to the Java one.
             */
            charset = CharsetUtil.lookup(charset).name();
          }
          /*
           * No encoding, so use us-ascii, which is the standard.
           */
          if (charset == null) {
            charset = "ASCII";
          }
          /*
           * Convert and return as new String
           */
          String result = out.toString(charset);
          out.close();
          return result;
        }
      }

    } catch (OutOfMemoryError oom) {
      /*
       * If we are not able to process the body there's nothing we can do about it. Return
       * null and let the upper layers handle the missing content.
       */
      VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    } catch (Exception e) {
      /*
       * If we are not able to process the body there's nothing we can do about it. Return
       * null and let the upper layers handle the missing content.
       */
      VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
    }
    return null;
  }

  /**
   * Returns true if the given mimeType matches the matchAgainst specification. The comparison
   * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
   *
   * @param mimeType A MIME type to check.
   * @param matchAgainst A MIME type to check against. May include wildcards.
   * @return true if the mimeType matches
   */
  public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), Pattern.CASE_INSENSITIVE);
    return p.matcher(mimeType).matches();
  }

  /**
   * Returns true if the given mimeType matches any of the matchAgainst specifications. The
   * comparison ignores case and the matchAgainst strings may include "*" for a wildcard (e.g.
   * "image/*").
   *
   * @param mimeType A MIME type to check.
   * @param matchAgainst An array of MIME types to check against. May include wildcards.
   * @return true if the mimeType matches any of the matchAgainst strings
   */
  public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    for (String matchType : matchAgainst) {
      if (mimeTypeMatches(mimeType, matchType)) {
        return true;
      }
    }
    return false;
  }

  /**
   * Given an input stream and a transfer encoding, return a wrapped input stream for that encoding
   * (or the original if none is required)
   *
   * @param in the input stream
   * @param contentTransferEncoding the content transfer encoding
   * @return a properly wrapped stream
   */
  public static InputStream getInputStreamForContentTransferEncoding(
      InputStream in, String contentTransferEncoding) {
    if (contentTransferEncoding != null) {
      contentTransferEncoding = MimeUtility.getHeaderParameter(contentTransferEncoding, null);
      if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
        in = new QuotedPrintableInputStream(in);
      } else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
        in = new Base64InputStream(in, Base64.DEFAULT);
      }
    }
    return in;
  }

  /** Removes any content transfer encoding from the stream and returns a Body. */
  public static Body decodeBody(InputStream in, String contentTransferEncoding) throws IOException {
    /*
     * We'll remove any transfer encoding by wrapping the stream.
     */
    in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
    BinaryTempFileBody tempBody = new BinaryTempFileBody();
    OutputStream out = tempBody.getOutputStream();
    try {
      IOUtils.copy(in, out);
    } catch (Base64DataException bde) {
      // TODO Need to fix this somehow
      //String warning = "\n\n" + Email.getMessageDecodeErrorString();
      //out.write(warning.getBytes());
    } finally {
      out.close();
    }
    return tempBody;
  }

  /**
   * Recursively scan a Part (usually a Message) and sort out which of its children will be
   * "viewable" and which will be attachments.
   *
   * @param part The part to be broken down
   * @param viewables This arraylist will be populated with all parts that appear to be the
   *     "message" (e.g. text/plain & text/html)
   * @param attachments This arraylist will be populated with all parts that appear to be
   *     attachments (including inlines)
   * @throws MessagingException
   */
  public static void collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)
      throws MessagingException {
    String disposition = part.getDisposition();
    String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    // If a disposition is not specified, default to "inline"
    boolean inline =
        TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
    // The lower-case mime type
    String mimeType = part.getMimeType().toLowerCase();

    if (part.getBody() instanceof Multipart) {
      // If the part is Multipart but not alternative it's either mixed or
      // something we don't know about, which means we treat it as mixed
      // per the spec. We just process its pieces recursively.
      MimeMultipart mp = (MimeMultipart) part.getBody();
      boolean foundHtml = false;
      if (mp.getSubTypeForTest().equals("alternative")) {
        for (int i = 0; i < mp.getCount(); i++) {
          if (mp.getBodyPart(i).isMimeType("text/html")) {
            foundHtml = true;
            break;
          }
        }
      }
      for (int i = 0; i < mp.getCount(); i++) {
        // See if we have text and html
        BodyPart bp = mp.getBodyPart(i);
        // If there's html, don't bother loading text
        if (foundHtml && bp.isMimeType("text/plain")) {
          continue;
        }
        collectParts(bp, viewables, attachments);
      }
    } else if (part.getBody() instanceof Message) {
      // If the part is an embedded message we just continue to process
      // it, pulling any viewables or attachments into the running list.
      Message message = (Message) part.getBody();
      collectParts(message, viewables, attachments);
    } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
      // We'll treat text and images as viewables
      viewables.add(part);
    } else {
      // Everything else is an attachment.
      attachments.add(part);
    }
  }
}