package play.modules.search.store; import java.util.ArrayList; import java.util.List; import play.Logger; import play.Play; import play.classloading.ApplicationClasses.ApplicationClass; import play.db.jpa.Blob; import play.modules.search.store.extractors.TextExtractor; import play.modules.search.store.mime.ExtensionGuesser; import play.modules.search.store.mime.MimeGuesser; /** * This class performs Full text extraction from various * file formats * @author jfp */ public class FileExtractor { public static List<TextExtractor> extractors = new ArrayList<TextExtractor>(); public static MimeGuesser mimeGuesser = new ExtensionGuesser(); public static void init() { Logger.debug("init FileExtractor"); List<ApplicationClass> classes = Play.classes.getAssignableClasses(TextExtractor.class); List<TextExtractor> extractors = new ArrayList<TextExtractor>(); for (ApplicationClass applicationClass : classes) { try { Logger.trace("adding %s as a TextExtractor", applicationClass.name); extractors.add((TextExtractor) applicationClass.javaClass.newInstance()); } catch (Exception e) { Logger.warn(e,"Could not instanciate text extractor %s",applicationClass.javaClass.getName()); } } FileExtractor.extractors = extractors; } public static String getText (Blob blob) { // Guess mime String mime = mimeGuesser.guess (blob); // Invoke the handlers String fileName = blob.getFile().getName(); for (TextExtractor extractor : extractors) { if (extractor.handles(mime)) { Logger.debug ("Using %s extractor to handle blob %s, mime=%s", extractor.getClass().getName(), fileName, blob.type()); return extractor.extract(blob); } } Logger.warn("No handlers able to index %s mime type, file was %s", mime, fileName); return null; } }