// Copyright � 2004-2005 ASERT. Released under the Canoo Webtest license.
package com.canoo.webtest.plugins.pdftest;
import org.apache.log4j.Logger;
import com.canoo.webtest.engine.ContextHelper;
import com.canoo.webtest.plugins.pdftest.htmlunit.PDFPage;
/**
* Extracts all text content from within the current PDF document.
*
* @author Paul King
* @author Marc Guillemot
* @webtest.step category="PDF"
* name="pdfToTextFilter"
* description="Extracts all text content from within the current <key>pdf</key> document."
*/
public class PdfToTextFilter extends AbstractPdfFilter
{
private static final Logger LOG = Logger.getLogger(PdfToTextFilter.class);
private String fMode = PDFPage.MODE_NORMAL;
private String fFragSep = " ";
private String fLineSep = "\n";
private String fPageSep;
public String getPageSep() {
return fPageSep;
}
/**
* @param pageSep
* @webtest.parameter
* required="no"
* default="[+++ NEW PAGE +++]\n"
* description="The page separator string to use, e.g. \"\n\" or \"------\"."
*/
public void setPageSep(final String pageSep) {
fPageSep = pageSep;
}
public String getLineSep() {
return fLineSep;
}
/**
* @param lineSep
* @webtest.parameter
* required="no"
* default="platform line separator"
* description="The line separator string to use, e.g. \" \" or \"\n\"."
*/
public void setLineSep(final String lineSep) {
fLineSep = lineSep;
}
public String getFragSep() {
return fFragSep;
}
/**
* @param fragSep
* @webtest.parameter
* required="no"
* default="a single space"
* description="The fragment separator string to use, e.g. \"\" or \" \" or \",\" or \" | \". Only used if <em>mode</em> is \"<em>groupByLines</em>\"."
*/
public void setFragSep(final String fragSep) {
fFragSep = fragSep;
}
public String getMode() {
return fMode;
}
/**
* @param mode
* @webtest.parameter
* required="no"
* default="normal"
* description="Deprecated: doesn't do anything anymore."
*/
public void setMode(final String mode) {
if (PDFPage.MODE_LINES.equals(mode)) {
LOG.warn("mode='" + PDFPage.MODE_LINES + "' is not supported anymore. Using mode='normal'.");
}
fMode = mode;
}
protected void verifyParameters() {
super.verifyParameters();
paramCheck(!PDFPage.MODE_NORMAL.equals(getMode()) && !PDFPage.MODE_LINES.equals(getMode()),
"Invalid mode");
}
protected void doFilter(final PDFPage pdfPage)
{
// String defaultPageSep = (PDFPage.MODE_NORMAL.equals(getMode()) ? "\n" : "") + "[+++ NEW PAGE +++]\n";
String defaultPageSep = "\n[+++ NEW PAGE +++]\n";
if (getPageSep() == null) {
fPageSep = defaultPageSep;
}
else {
fPageSep = fPageSep.replaceAll("\\\\n", "\n");
fPageSep = fPageSep.replaceAll("\\\\r", "\r");
}
final String text = pdfPage.getText(getFragSep(), getLineSep(), getPageSep(), PDFPage.MODE_NORMAL);
LOG.debug("Filter result: >" + text + "<");
ContextHelper.defineAsCurrentResponse(getContext(), text, "text/plain", getClass());
}
}