package edu.isi.bmkeg.lapdf.bin; import java.io.File; import java.net.URL; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import edu.isi.bmkeg.lapdf.controller.LapdfEngine; import edu.isi.bmkeg.lapdf.controller.LapdfMode; import edu.isi.bmkeg.lapdf.model.Block; import edu.isi.bmkeg.lapdf.model.LapdfDocument; import edu.isi.bmkeg.lapdf.uima.cpe.CommandLineFitPipeline; import edu.isi.bmkeg.lapdf.utils.PdfDirWatcher; import edu.isi.bmkeg.utils.Converters; public class WatchDirectory { public static String USAGE = "usage: <COMMAND> <dir-to-be-watched> <output-dir> [<rule-file>]\n\n" + "<COMMAND> - the command to be executed: \n" + " - " + PdfDirWatcher.IMAGIFY_BLOCKS + "\n" + " - " + PdfDirWatcher.IMAGIFY_SECTIONS + "\n" + " - " + PdfDirWatcher.BLOCKIFY + "\n" + " - " + PdfDirWatcher.BLOCKIFY_CLASSIFY + "\n" + " - " + PdfDirWatcher.READ_SECTION_TEXT + "\n" + "<dir-to-be-watched> - the full path to the directory to be watched \n" + "<output-dir> (optional or '-') - the full path to the output directory \n" + "<rule-file> (optional or '-') - the full path to the rule file \n\n" + "This program maintains a watcher on this directory to execute the \n" + "denoted command on any PDF files added to the directory. \n" + "The system will then delete the appropriate files and folders\n" + "when the originating PDF file is removed.\n"; public static void main(String args[]) throws Exception { if (args.length < 3 ) { System.err.println(USAGE); System.exit(1); } String type = args[0]; String inputDirPath = args[1]; String outputDirPath = ""; String ruleFilePath = ""; File inputDir = new File( inputDirPath ); if( !inputDir.exists() ) { System.err.println(USAGE); System.err.println("Input file / dir '" + inputDirPath + "' does not exist."); System.err.println("Please include full path"); System.exit(1); } // output folder must not be contained in the input set. outputDirPath = args[2]; File outDir = new File( outputDirPath ); if( !outDir.exists() ) { outDir.mkdir(); } File temp = new File(outDir.getPath()); while( temp.getParentFile() != null ) { if( temp.equals(inputDir) ) { System.err.println(USAGE); System.err.println(outputDirPath + " cannot be inside " + inputDirPath); System.exit(1); } temp = temp.getParentFile(); } // output folder is set. File ruleFile = null; if ( args.length > 3 ) { ruleFilePath = args[3]; } else { ruleFilePath = "-"; } if( ruleFilePath.equals( "-" ) ) { ruleFile = Converters.extractFileFromJarClasspath("rules/general.drl"); } else { ruleFile = new File( ruleFilePath ); } if( !ruleFile.exists() ) { System.err.println(USAGE); System.err.println(ruleFilePath + " does not exist."); System.err.println("Please include full path"); System.exit(1); } // then run the watcher to sit around and wait for changes to the directory. PdfDirWatcher p = new PdfDirWatcher(type, inputDir, outDir, ruleFile); // // start by running the text extraction pipeline over the folders. // Pattern patt = Pattern.compile("\\.pdf$"); Map<String, File> inputFiles = Converters.recursivelyListFiles( inputDir, patt); Iterator<String> it = inputFiles.keySet().iterator(); while (it.hasNext()) { String key = it.next(); File pdf = inputFiles.get(key); p.execute(pdf); } p.setUpLiveFolder(); p.run(); } }