/*
* (C) Copyright 2006-2009 Nuxeo SAS (http://nuxeo.com/) and contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser General Public License
* (LGPL) version 2.1 which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/lgpl.html
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* Contributors:
* Nuxeo - initial API and implementation
*
* $Id$
*/
package org.nuxeo.ecm.core.convert.plugins.text.extractors;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.mail.Address;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.Message.RecipientType;
import javax.mail.internet.ContentType;
import javax.mail.internet.MimeMessage;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
import org.nuxeo.ecm.core.api.impl.blob.FileBlob;
import org.nuxeo.ecm.core.convert.api.ConversionException;
import org.nuxeo.ecm.core.convert.api.ConversionService;
import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
import org.nuxeo.ecm.core.convert.extension.Converter;
import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
import org.nuxeo.runtime.api.Framework;
public class RFC822ToTextConverter implements Converter {
private static final Log log = LogFactory.getLog(RFC822ToTextConverter.class);
private static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822";
private static final String TXT_MT = "text/plain";
protected ConverterDescriptor descriptor;
protected Blob extractTextFromMessage(Blob blob) {
if (blob == null) {
return null;
}
File f = null;
OutputStream fo = null;
try {
MimeMessage msg = new MimeMessage((Session) null,
blob.getStream());
f = File.createTempFile("rfc822totext", ".txt");
fo = new FileOutputStream(f);
List<Part> parts = getAttachmentParts(msg);
writeInfo(fo, msg.getSubject());
writeInfo(fo, msg.getFrom());
writeInfo(fo, msg.getRecipients(RecipientType.TO));
writeInfo(fo, msg.getRecipients(RecipientType.CC));
for (Part part : parts) {
writeInfo(fo, part.getFileName());
writeInfo(fo, part.getDescription());
byte[] extracted = extractTextFromMessagePart(part);
if (extracted != null) {
writeInfo(fo, extracted);
}
}
Blob outblob = new FileBlob(new FileInputStream(f));
outblob.setMimeType(descriptor.getDestinationMimeType());
return outblob;
} catch (Exception e) {
log.error(e);
} finally {
if (fo != null) {
try {
fo.close();
} catch (IOException e) {
log.error(e);
}
}
if (f != null) {
f.delete();
}
}
return null;
}
protected static void writeInfo(OutputStream stream, Address address) {
if (address != null) {
try {
stream.write(address.toString().getBytes());
stream.write(" ".getBytes());
} catch (Exception e) {
log.error(e);
}
}
}
protected static void writeInfo(OutputStream stream, Address[] addresses) {
if (addresses != null) {
for (Address address : addresses) {
writeInfo(stream, address);
}
}
}
protected static void writeInfo(OutputStream stream, String info) {
if (info != null) {
try {
stream.write(info.getBytes());
stream.write(" ".getBytes());
} catch (Exception e) {
log.error(e);
}
}
}
protected static void writeInfo(OutputStream stream, byte[] info) {
if (info != null) {
try {
stream.write(info);
stream.write(" ".getBytes());
} catch (Exception e) {
log.error(e);
}
}
}
protected static byte[] extractTextFromMessagePart(Part p) throws Exception {
ContentType contentType = new ContentType(p.getContentType());
String baseType = contentType.getBaseType();
if (TXT_MT.equals(baseType)) {
Object content = p.getContent();
if (content instanceof String) {
return ((String) content).getBytes();
} else {
return null;
}
}
ConversionService cs = Framework.getLocalService(ConversionService.class);
String converterName = cs.getConverterName(baseType, TXT_MT);
if (converterName == null) {
return null;
} else {
BlobHolder result = cs.convert(converterName,
new SimpleBlobHolder(new FileBlob(p.getInputStream())), null);
return result.getBlob().getByteArray();
}
}
protected static List<Part> getAttachmentParts(Part p) throws Exception {
List<Part> res = new ArrayList<Part>();
if (p.isMimeType(MESSAGE_RFC822_MIMETYPE)) {
res.addAll(getAttachmentParts((Part) p.getContent()));
} else if (p.isMimeType("multipart/alternative")) {
// only return one of the text alternatives
Multipart mp = (Multipart) p.getContent();
int count = mp.getCount();
Part alternativePart = null;
for (int i = 0; i < count; i++) {
Part subPart = mp.getBodyPart(i);
if (subPart.isMimeType(TXT_MT)) {
alternativePart = subPart;
break;
} else if (subPart.isMimeType("text/*")) {
alternativePart = subPart;
} else {
res.addAll(getAttachmentParts(subPart));
}
}
if (alternativePart != null) {
res.add(alternativePart);
}
} else if (p.isMimeType("multipart/*")) {
Multipart mp = (Multipart) p.getContent();
int count = mp.getCount();
for (int i = 0; i < count; i++) {
res.addAll(getAttachmentParts(mp.getBodyPart(i)));
}
} else {
res.add(p);
}
return res;
}
@Override
public BlobHolder convert(BlobHolder blobHolder,
Map<String, Serializable> parameters) throws ConversionException {
Blob inputBlob;
try {
inputBlob = blobHolder.getBlob();
} catch (Exception e) {
throw new ConversionException("Error while getting blob from Holder", e);
}
Blob outputBlob;
try {
outputBlob = extractTextFromMessage(inputBlob);
} catch (Exception e) {
throw new ConversionException("Error in Text conversion", e);
}
return new SimpleCachableBlobHolder(outputBlob);
}
@Override
public void init(ConverterDescriptor descriptor) {
this.descriptor = descriptor;
}
}