package edu.isi.bmkeg.lapdf.bin;
import java.io.File;
import java.net.URL;
import edu.isi.bmkeg.lapdf.uima.cpe.CommandLineFitPipeline;
@Deprecated
public class CommandLineTool
{
private static final String OPERATION_PDFEX_BLOCKIFY = "blockify";
private static final String OPERATION_PDFEX_BLOCK_STATISTICS = "blockStatistics";
private static final String OPERATION_PDFEX_BLOCKIFY_CLASSIFY = "blockifyClassify";
private static final String OPERATION_FILTERED_LAYOUTAWARE_FULLTEXT_EXTRACTION = "extractFullText";
public static void main(String args[])
{
CommandLineFitPipeline pipeline = null;
if (args.length == 0) {
printUsage();
System.exit(1);
}
String operationType = args[0];
String outputFolder = null;
String inputFolder = null;
String ruleFileLocation = null;
if (operationType.equals(OPERATION_PDFEX_BLOCKIFY)) {
if (args.length == 2) {
inputFolder = args[1];
outputFolder = args[1];
} else if(args.length==3) {
inputFolder = args[1];
outputFolder = args[2];
} else {
printUsage();
System.exit(1);
}
try {
pipeline = new CommandLineFitPipeline(inputFolder, true, false, outputFolder);
} catch (Exception e) {
e.printStackTrace();
printUsage();
}
} else if (operationType.equals(OPERATION_PDFEX_BLOCK_STATISTICS)) {
if (args.length == 2) {
inputFolder = args[1];
outputFolder = args[1];
} else if(args.length==3) {
inputFolder = args[1];
outputFolder = args[2];
} else {
printUsage();
System.exit(1);
}
try {
pipeline = new CommandLineFitPipeline(inputFolder, true, true, outputFolder);
} catch (Exception e) {
e.printStackTrace();
printUsage();
}
} else if (operationType.equals(OPERATION_PDFEX_BLOCKIFY_CLASSIFY)) {
if (args.length == 3) {
inputFolder = args[1];
outputFolder = args[1];
ruleFileLocation = args[2];
} else if(args.length == 4) {
inputFolder = args[1];
ruleFileLocation = args[2];
outputFolder = args[3];
} else {
printUsage();
System.exit(1);
}
try {
pipeline = new CommandLineFitPipeline(inputFolder, ruleFileLocation, false, true, outputFolder);
} catch (Exception e) {
e.printStackTrace();
printUsage();
}
} else if (operationType.equals(OPERATION_FILTERED_LAYOUTAWARE_FULLTEXT_EXTRACTION)) {
if ( args.length > 4 || args.length < 2 ) {
printUsage();
System.exit(1);
} else if(args.length == 4) {
inputFolder = args[1];
ruleFileLocation = args[2];
outputFolder = args[3];
} else if( args.length == 3) {
inputFolder = args[1];
outputFolder = args[1];
ruleFileLocation = args[2];
} else if( args.length == 2 ) {
inputFolder = args[1];
outputFolder = args[1];
URL u = CommandLineFitPipeline.class.getClassLoader().getResource(
"rules/general.drl");
File ruleFile = new File( u.getPath() );
ruleFileLocation = ruleFile.getPath();
}
try {
//in this mode since we have a rule file we set extractUnclassified to false and report blocks to false
pipeline = new CommandLineFitPipeline(inputFolder, ruleFileLocation, false, false, outputFolder);
} catch (Exception e) {
e.printStackTrace();
printUsage();
}
} else {
printUsage();
}
if( pipeline != null ){
pipeline.run();
}
}
public static void printUsage() {
System.out.println("Usage Guidelines");
System.out.println("1. Blockifying PDF: Use this option if you want to blockify the PDF and output the blocks XML");
System.out.println("Usage\nArgument 1:" + OPERATION_PDFEX_BLOCKIFY + "\nArgument 2: The directory path where the PDFs are located \nArgument 3[Optional]: The directory path where output of blockify will be placed");
System.out.println("\n2. Blockifying PDF and reporting Features: Use this option if you want to blockify the PDF and output the blocks XML and generate a report file that serves as a guide in crafting a rule file for the sectionify step.");
System.out.println("Usage\nArgument 1:" + OPERATION_PDFEX_BLOCK_STATISTICS + "\nArgument 2: The directory path where the PDFs are located \nArgument 3[Optional]: The directory path where output of blockify and the feature reports will be placed");
System.out.println("\n3. Blockifying and sectionifying PDF: Use this option if you want to blockify the PDF and do rhetorical classification of the blocks.It will output an openAccess based XML");
System.out.println("Usage\nArgument 1:" + OPERATION_PDFEX_BLOCKIFY_CLASSIFY + "\nArgument 2: The directory path where the PDFs are located\nArgument 3: The path of the rule file for Drools \nArgument 4[Optional]: The directory path where output of blockify and sectionify will be placed");
System.out.println("\n4. Extracting full text from PDF: Use this argument if you want to extract particular section from the openAccess based XML");
System.out.println("Usage\nArgument 1:" + OPERATION_FILTERED_LAYOUTAWARE_FULLTEXT_EXTRACTION + "\nArgument 2: The directory path where the PDFs are located\nArgument 3: The path of the rule file for Drools");
/*System.out.println("4. Extracting sections from openAccess based XML: Use this argument if you want to extract particular section from the openAccess based XML");
System.out.println("Usage\nArgument 1:" + OPERATION_SECTION_EXTRACTION + "\nArgument 2: The path of the openAccess based XML\nArgument 3: The location where the output file should be created\nArgument 4: Type of the section");
System.out.println("Please use one of the following section type:");
System.out.println("1. " + SectionBasedTextExtractor.ELEMENT_ABSTRACT);
System.out.println("2. " + SectionBasedTextExtractor.ELEMENT_INTRODUCTION);
System.out.println("3. " + SectionBasedTextExtractor.ELEMENT_MATERIALS_METHODS);
System.out.println("4. " + SectionBasedTextExtractor.ELEMENT_DISCUSSION);
System.out.println("5. " + SectionBasedTextExtractor.ELEMENT_RESULTS);
System.out.println("6. " + SectionBasedTextExtractor.ELEMENT_CONCLUSIONS);
*/
}
}