/*
* (C) Copyright 2006-2009 Nuxeo SA (http://nuxeo.com/) and others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
* Nuxeo - initial API and implementation
*
* $Id$
*/
package org.nuxeo.ecm.core.convert.plugins.text.extractors;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.mail.Address;
import javax.mail.Message.RecipientType;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.ContentType;
import javax.mail.internet.MimeMessage;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.Blobs;
import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
import org.nuxeo.ecm.core.convert.api.ConversionException;
import org.nuxeo.ecm.core.convert.api.ConversionService;
import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
import org.nuxeo.ecm.core.convert.extension.Converter;
import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
import org.nuxeo.runtime.api.Framework;
public class RFC822ToTextConverter implements Converter {
private static final Log log = LogFactory.getLog(RFC822ToTextConverter.class);
private static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822";
private static final String TXT_MT = "text/plain";
protected ConverterDescriptor descriptor;
protected Blob extractTextFromMessage(Blob blob) {
if (blob == null) {
return null;
}
File f = null;
OutputStream fo = null;
try {
MimeMessage msg = new MimeMessage((Session) null, blob.getStream());
f = Framework.createTempFile("rfc822totext", ".txt");
fo = new FileOutputStream(f);
List<Part> parts = getAttachmentParts(msg);
writeInfo(fo, msg.getSubject());
writeInfo(fo, msg.getFrom());
writeInfo(fo, msg.getRecipients(RecipientType.TO));
writeInfo(fo, msg.getRecipients(RecipientType.CC));
for (Part part : parts) {
writeInfo(fo, part.getFileName());
writeInfo(fo, part.getDescription());
byte[] extracted = extractTextFromMessagePart(part);
if (extracted != null) {
writeInfo(fo, extracted);
}
}
Blob outblob;
try (InputStream in = new FileInputStream(f)) {
outblob = Blobs.createBlob(in);
}
outblob.setMimeType(descriptor.getDestinationMimeType());
return outblob;
} catch (IOException | MessagingException e) {
log.error(e);
} finally {
if (fo != null) {
try {
fo.close();
} catch (IOException e) {
log.error(e);
}
}
if (f != null) {
f.delete();
}
}
return null;
}
protected static void writeInfo(OutputStream stream, Address address) {
if (address != null) {
try {
stream.write(address.toString().getBytes());
stream.write(" ".getBytes());
} catch (IOException e) {
log.error(e, e);
}
}
}
protected static void writeInfo(OutputStream stream, Address[] addresses) {
if (addresses != null) {
for (Address address : addresses) {
writeInfo(stream, address);
}
}
}
protected static void writeInfo(OutputStream stream, String info) {
if (info != null) {
try {
stream.write(info.getBytes());
stream.write(" ".getBytes());
} catch (IOException e) {
log.error(e, e);
}
}
}
protected static void writeInfo(OutputStream stream, byte[] info) {
if (info != null) {
try {
stream.write(info);
stream.write(" ".getBytes());
} catch (IOException e) {
log.error(e, e);
}
}
}
protected static byte[] extractTextFromMessagePart(Part p) throws MessagingException, IOException {
ContentType contentType = new ContentType(p.getContentType());
String baseType = contentType.getBaseType();
if (TXT_MT.equals(baseType)) {
Object content = p.getContent();
if (content instanceof String) {
return ((String) content).getBytes();
} else {
return null;
}
}
ConversionService cs = Framework.getLocalService(ConversionService.class);
String converterName = cs.getConverterName(baseType, TXT_MT);
if (converterName == null) {
return null;
} else {
Blob blob;
try (InputStream in = p.getInputStream()) {
blob = Blobs.createBlob(in);
}
BlobHolder result = cs.convert(converterName, new SimpleBlobHolder(blob), null);
return result.getBlob().getByteArray();
}
}
protected static List<Part> getAttachmentParts(Part p) throws MessagingException, IOException {
List<Part> res = new ArrayList<Part>();
if (p.isMimeType(MESSAGE_RFC822_MIMETYPE)) {
res.addAll(getAttachmentParts((Part) p.getContent()));
} else if (p.isMimeType("multipart/alternative")) {
// only return one of the text alternatives
Multipart mp = (Multipart) p.getContent();
int count = mp.getCount();
Part alternativePart = null;
for (int i = 0; i < count; i++) {
Part subPart = mp.getBodyPart(i);
if (subPart.isMimeType(TXT_MT)) {
alternativePart = subPart;
break;
} else if (subPart.isMimeType("text/*")) {
alternativePart = subPart;
} else {
res.addAll(getAttachmentParts(subPart));
}
}
if (alternativePart != null) {
res.add(alternativePart);
}
} else if (p.isMimeType("multipart/*")) {
Multipart mp = (Multipart) p.getContent();
int count = mp.getCount();
for (int i = 0; i < count; i++) {
res.addAll(getAttachmentParts(mp.getBodyPart(i)));
}
} else {
res.add(p);
}
return res;
}
@Override
public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
Blob inputBlob = blobHolder.getBlob();
Blob outputBlob = extractTextFromMessage(inputBlob);
return new SimpleCachableBlobHolder(outputBlob);
}
@Override
public void init(ConverterDescriptor descriptor) {
this.descriptor = descriptor;
}
}