package org.elasticsearch.mapper.attachments;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.elasticsearch.SpecialPermission;
import org.elasticsearch.common.io.stream.StreamInput;
/** do NOT make public */
final class TikaImpl {
/** subset of parsers for types we support */
private static final Parser PARSERS[] = new Parser[] {
// documents
new org.apache.tika.parser.html.HtmlParser(),
new org.apache.tika.parser.rtf.RTFParser(),
new org.apache.tika.parser.pdf.PDFParser(),
new org.apache.tika.parser.txt.TXTParser(),
new org.apache.tika.parser.microsoft.OfficeParser(),
new org.apache.tika.parser.microsoft.OldExcelParser(),
new org.apache.tika.parser.microsoft.ooxml.OOXMLParser(),
new org.apache.tika.parser.odf.OpenDocumentParser(),
new org.apache.tika.parser.iwork.IWorkPackageParser(),
new org.apache.tika.parser.xml.DcXMLParser(),
};
/** autodetector based on this subset */
private static final AutoDetectParser PARSER_INSTANCE = new AutoDetectParser(PARSERS);
/** singleton tika instance */
private static final Tika TIKA_INSTANCE = new Tika(PARSER_INSTANCE.getDetector(), PARSER_INSTANCE);
/**
* parses with tika, throwing any exception hit while parsing the document
*/
// only package private for testing!
static String parse(final byte content[], final Metadata metadata, final int limit) throws TikaException, IOException {
// check that its not unprivileged code like a script
SecurityManager sm = System.getSecurityManager();
if (sm != null) {
sm.checkPermission(new SpecialPermission());
}
try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<String>() {
@Override
public String run() throws TikaException, IOException {
return TIKA_INSTANCE.parseToString(StreamInput.wrap(content), metadata, limit);
}
});
} catch (PrivilegedActionException e) {
// checked exception from tika: unbox it
Throwable cause = e.getCause();
if (cause instanceof TikaException) {
throw (TikaException) cause;
} else if (cause instanceof IOException) {
throw (IOException) cause;
} else {
throw new AssertionError(cause);
}
}
}
}