/*
* Copyright 2000-2013 Enonic AS
* http://www.enonic.com/license
*/
package com.enonic.cms.api.plugin.ext;
import java.io.IOException;
import java.io.InputStream;
/**
* This class defines the text extractor plugin.
*/
public abstract class TextExtractor
extends ExtensionBase
{
/**
* This method returns true if mime type can be indexed by this plugin.
*
* @param mimeType A mime-type.
* @return <code>true</code> if the implementation can handle file with the given mime-type, <code>false</code> otherwise.
*/
public abstract boolean canHandle( String mimeType );
/**
* Extracts all the text from a given binary document.
*
* @param mimeType A mime-type.
* @param stream An InputStream, connected to the binary document to extract text from.
* @param encoding Character encoding to use.
* @return The pure text contained in the document.
* @throws java.io.IOException If there are problems reading the input stream.
*/
public abstract String extractText( String mimeType, InputStream stream, String encoding )
throws IOException;
}