package edu.isi.bmkeg;
import java.io.File;
import java.net.URL;
import junit.framework.TestCase;
import org.junit.Test;
import edu.isi.bmkeg.lapdf.bin.CommandLineTool;
import edu.isi.bmkeg.utils.Converters;
public class CommandLineToolTest extends TestCase
{
private File plos8_8_dir;
private File plos8_8_dir_out;
private File epoch_7Jun_8_drl;
private File epoch_7Jun_8_csv;
protected void setUp() throws Exception
{
super.setUp();
URL url = this.getClass().getClassLoader().getResource(
"sampleData/plos/8_8"
);
plos8_8_dir = new File( url.getPath() );
url = this.getClass().getClassLoader().getResource(
"sampleData/plos/8_8_OUTPUT"
);
plos8_8_dir_out = new File( url.getPath() );
// empty output directory.
Converters.recursivelyDeleteFiles(plos8_8_dir_out);
plos8_8_dir_out.mkdir();
url = this.getClass().getClassLoader().getResource(
"rules/plosbiology/epoch_7Jun_8.drl"
);
epoch_7Jun_8_drl = new File( url.getPath() );
url = this.getClass().getClassLoader().getResource(
"rules/plosbiology/epoch_7Jun_8.csv"
);
epoch_7Jun_8_csv = new File( url.getPath() );
}
protected void tearDown() throws Exception
{
super.tearDown();
Converters.cleanContentsFiles(plos8_8_dir, "pdf");
}
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* - Extract contiguous blocks from an input PDF file
* - generate a report file for each input PDF containing page-wise statistics
* of each block detected. This is meant as a guide for developers to use in
* the process of developing rules for block classification and evantual
* section-wise text extraction.
*/
@Test
public void testBlockStats()
{
String args[] = {
"blockStatistics",
plos8_8_dir.getPath()
};
CommandLineTool.main(args);
}
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* - Extract contiguous blocks from an input PDF file. In this test the output location
* is unspecified and therefore output is written to the input folder.
*
*/
@Test
public void test1()
{
String args[] = {
"blockify",
plos8_8_dir.getPath()
};
CommandLineTool.main(args);
}
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* - Extract contiguous blocks from an input PDF file. In this test the output location
* is specified.
*
*/
@Test
public void test2()
{
String args[] = {
"blockify",
plos8_8_dir.getPath(),
plos8_8_dir_out.getPath()
};
CommandLineTool.main(args);
}
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* - Extract contiguous blocks from an input PDF file. In this test the output location
* is unspecified and therefore output is written to the input folder.
* - Classify the extracted blocks into their corresponding sections.
* The types of sections that are supported are listed in the
* java interface edu.isi.bmkeg.pdf.model.Block
*/
@Test
public void test3()
{
String args[] = {
"blockifyClassify",
plos8_8_dir.getPath(),
epoch_7Jun_8_drl.getPath(),
plos8_8_dir_out.getPath()
};
CommandLineTool.main(args);
}
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* - Extract contiguous blocks from an input PDF file. In this test the output location
* is unspecified and therefore output is written to the input folder.
* - Classify the extracted blocks into their corresponding sections.
* The types of sections that are supported are listed in the
* java interface edu.isi.bmkeg.pdf.model.Block
* - The ability of LA-PDFText to accept classification rules written as an excel spreadsheet
*/
@Test
public void test3Excel()
{
String args[] = {
"blockifyClassify",
plos8_8_dir.getPath(),
epoch_7Jun_8_csv.getPath(),
plos8_8_dir_out.getPath()
};
CommandLineTool.main(args);
}
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* - Extract contiguous blocks from an input PDF file. In this test the output location
* is unspecified and therefore output is written to the input folder.
* - Classify the extracted blocks into their corresponding sections.
* The types of sections that are supported are listed in the
* java interface edu.isi.bmkeg.pdf.model.Block
* - The ability of LA-PDFText to extract the text in plain text from by using the
* section classifications to filter out those sections that are not a part of the
* main narrative of the input article.
*/
@Test
public void test4()
{
String args[] = {
"extractFullText",
plos8_8_dir.getPath(),
epoch_7Jun_8_csv.getPath(),
plos8_8_dir_out.getPath()
};
CommandLineTool.main(args);
}
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* error check the input modes of operation. Current version does not
* support the extraction of individual sections.
*
@Test
public void test5()
{
String args[] = {"extractSection",
"src/test/resources/sampleData/plos/8_8_OUTPUT/pbio.1000441.pdf_rhetorical.xml",
"src/test/resources/sampleData/plos/8_8_OUTPUT/pbio.1000441.pdf_rhetorical.methods",
"materials|methods"};
CommandLineTool.main(args);
}*/
/**
* This test is designed to demonstrate the capability of LA-PDFText to
* - Extract contiguous blocks from an input PDF file. In this test the output location
* is unspecified and therefore output is written to the input folder.
* - Classify the extracted blocks into their corresponding sections.
* The types of sections that are supported are listed in the
* java interface edu.isi.bmkeg.pdf.model.Block
* - The ability of LA-PDFText to extract the text in plain text from by using the
* section classifications to filter out those sections that are not a part of the
* main narrative of the input article.
* - The rule file used here is a generic journal and publisher format-agnostic
* rule file which identifies the page footers and headers only. Subsequently,
* the class edu.isi.bmkeg.pdf.text.SpatiallyOrderedChunkTextWriter is used to
* filter out the header and footer to write text that is not interrupted by
* their formatting embellishments.
*
@Test
public void testGeneral()
{
String args[] = {"blockifyClassify",
"/Users/cartic/Desktop/temp/bloodOriginal/new",
"src/main/resources/rules/plosbiology/general.drl"};
CommandLineTool.main(args);
}*/
}