/* * Copyright 2000-2011 Enonic AS * http://www.enonic.com/license */ package com.enonic.cms.plugin.extractor; import java.io.IOException; import java.io.InputStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; import com.enonic.cms.api.plugin.ext.TextExtractor; public class PdfExtractor extends TextExtractor { @Override public boolean canHandle( String mimeType ) { if ( "application/pdf".equals( mimeType ) ) { return true; } else { return false; } } @Override public String extractText( String mimeType, InputStream inputStream, String encoding ) throws IOException { if ( canHandle( mimeType ) ) { PDDocument doc = PDDocument.load( inputStream ); PDFTextStripper stripper = new PDFTextStripper(); String text = stripper.getText(doc); doc.close(); return text; } else { return null; } } }