/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.mediafilter;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.log4j.Logger;
import org.dspace.content.Item;
/*
* ExcelFilter
*
* Entries you must add to dspace.cfg:
*
* filter.plugins = blah, \
* Excel Text Extractor
*
* plugin.named.org.dspace.app.mediafilter.FormatFilter = \
* blah = blah, \
* org.dspace.app.mediafilter.ExcelFilter = Excel Text Extractor
*
* #Configure each filter's input Formats
* filter.org.dspace.app.mediafilter.ExcelFilter.inputFormats = Microsoft Excel, Microsoft Excel XML
*
*/
public class ExcelFilter extends MediaFilter
{
private static Logger log = Logger.getLogger(ExcelFilter.class);
public String getFilteredName(String oldFilename)
{
return oldFilename + ".txt";
}
/**
* @return String bundle name
*
*/
public String getBundleName()
{
return "TEXT";
}
/**
* @return String bitstream format
*
*
*/
public String getFormatString()
{
return "Text";
}
/**
* @return String description
*/
public String getDescription()
{
return "Extracted text";
}
/**
* @param item item
* @param source source input stream
* @param verbose verbose mode
*
* @return InputStream the resulting input stream
* @throws Exception if error
*/
@Override
public InputStream getDestinationStream(Item item, InputStream source, boolean verbose)
throws Exception
{
String extractedText = null;
try
{
POITextExtractor theExtractor = ExtractorFactory.createExtractor(source);
if (theExtractor instanceof ExcelExtractor)
{
// for xls file
extractedText = (theExtractor).getText();
}
else if (theExtractor instanceof XSSFExcelExtractor)
{
// for xlsx file
extractedText = (theExtractor).getText();
}
}
catch (Exception e)
{
log.error("Error filtering bitstream: " + e.getMessage(), e);
throw e;
}
if (extractedText != null)
{
// generate an input stream with the extracted text
return IOUtils.toInputStream(extractedText, StandardCharsets.UTF_8);
}
return null;
}
}