package edu.isi.bmkeg.lapdf.utils;
import java.io.File;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import edu.isi.bmkeg.lapdf.bin.CommandLineTool;
import edu.isi.bmkeg.lapdf.bin.WatchDirectory;
import edu.isi.bmkeg.lapdf.controller.LapdfEngine;
import edu.isi.bmkeg.lapdf.controller.LapdfMode;
import edu.isi.bmkeg.lapdf.model.Block;
import edu.isi.bmkeg.lapdf.model.LapdfDocument;
import edu.isi.bmkeg.lapdf.uima.cpe.CommandLineFitPipeline;
import edu.isi.bmkeg.utils.Converters;
import edu.isi.bmkeg.utils.parser.DirWatcher;
public class PdfDirWatcher extends DirWatcher {
private static Logger logger = Logger.getLogger(PdfDirWatcher.class);
int state = 0;
static int WAITING = 0;
static int WORKING = 1;
static int ERROR = 2;
private String type;
public static String IMAGIFY_BLOCKS = "ImagifyBlock";
public static String IMAGIFY_SECTIONS = "ImagifySection";
public static String BLOCKIFY = "Blockify";
public static String BLOCKIFY_CLASSIFY = "BlockifyClassify";
public static String READ_SECTION_TEXT = "ReadText";
private Timer timer;
private File output;
private File ruleFile;
private LapdfEngine engine;
private Set<File> files = new HashSet<File>();
public PdfDirWatcher(String type, File input, File output, File ruleFile)
throws Exception {
super(input, ".pdf");
this.type = type;
this.output = output;
this.ruleFile = ruleFile;
this.engine = new LapdfEngine();
}
public void setUpLiveFolder() {
this.timer = new Timer();
timer.schedule(this, new Date(), 1000);
}
public void cancelLiveFolder() {
this.timer.cancel();
}
protected void onChange(File file, String action) {
try {
state = WORKING;
if (action.equals("added") ) {
logger.info("File " + file.getName() + " action: " + action);
this.execute(file);
} else if (action.equals("deleted")) {
logger.info("File " + file.getName() + " action: " + action);
String stem = file.getName();
if (type.equals(PdfDirWatcher.IMAGIFY_BLOCKS)) {
stem = stem.replaceAll("\\.pdf", "_blockImgs");
File toDelete = new File(output.getPath() + "/" + stem);
Converters.recursivelyDeleteFiles(toDelete);
} else if (type.equals(PdfDirWatcher.IMAGIFY_SECTIONS)) {
stem = stem.replaceAll("\\.pdf", "_secImgs");
File toDelete = new File(output.getPath() + "/" + stem);
Converters.recursivelyDeleteFiles(toDelete);
} else if (type.equals(PdfDirWatcher.BLOCKIFY)) {
stem = stem.replaceAll("\\.pdf", "_spatial.xml");
File toDelete = new File(output.getPath() + "/" + stem);
toDelete.delete();
} else if (type.equals(PdfDirWatcher.BLOCKIFY_CLASSIFY)) {
stem = stem.replaceAll("\\.pdf", "_openAccess.xml");
File toDelete = new File(output.getPath() + "/" + stem);
toDelete.delete();
} else if (type.equals(PdfDirWatcher.READ_SECTION_TEXT)) {
stem = stem.replaceAll("\\.pdf", "_fullText.txt");
File toDelete = new File(output.getPath() + "/" + stem);
toDelete.delete();
}
}
state = WAITING;
} catch (Exception e) {
state = ERROR;
e.printStackTrace();
}
}
public void execute(File pdf) throws Exception {
String pdfStem = pdf.getName();
pdfStem = pdfStem.replaceAll("\\.pdf", "");
String outPath = Converters.mimicDirectoryStructure(this.getInput(),
output, pdf).getPath();
File out = null;
if (type.equals(PdfDirWatcher.IMAGIFY_BLOCKS)) {
outPath = outPath.replaceAll("\\.pdf", "_blockImgs");
out = new File(outPath);
if (!out.exists())
out.mkdir();
try {
LapdfDocument lapdf = engine.blockifyPdfFile(pdf);
engine.renderImageOutlines(lapdf, out, pdfStem,
LapdfMode.BLOCK_ONLY);
} catch (Exception e) {
e.printStackTrace();
}
} else if (type.equals(PdfDirWatcher.IMAGIFY_SECTIONS)) {
outPath = outPath.replaceAll("\\.pdf", "_secImgs");
out = new File(outPath);
if (!out.exists())
out.mkdir();
try {
LapdfDocument lapdf = engine.blockifyPdfFile(pdf);
engine.classifyDocument(lapdf, ruleFile);
engine.renderImageOutlines(lapdf, out, pdfStem,
LapdfMode.CLASSIFY);
} catch (Exception e) {
e.printStackTrace();
}
} else if (type.equals(PdfDirWatcher.BLOCKIFY)) {
outPath = outPath.replaceAll("\\.pdf", "_spatial.xml");
out = new File(outPath);
try {
LapdfDocument lapdf = engine.blockifyPdfFile(pdf);
engine.writeSpatialXmlToFile(lapdf, out);
} catch (Exception e) {
e.printStackTrace();
}
} else if (type.equals(PdfDirWatcher.BLOCKIFY_CLASSIFY)) {
outPath = outPath.replaceAll("\\.pdf", "_openAccess.xml");
out = new File(outPath);
try {
LapdfDocument lapdf = engine.blockifyPdfFile(pdf);
engine.classifyDocument(lapdf, ruleFile);
engine.writeSectionsToOpenAccessXmlFile(lapdf, out);
} catch (Exception e) {
e.printStackTrace();
}
} else if (type.equals(PdfDirWatcher.READ_SECTION_TEXT)) {
List<Set<String>> stack = new ArrayList<Set<String>>();
Set<String> sections = new HashSet<String>();
sections.add(Block.TYPE_BODY);
sections.add(Block.TYPE_HEADING);
stack.add(sections);
sections = new HashSet<String>();
sections.add(Block.TYPE_FIGURE_LEGEND);
stack.add(sections);
outPath = outPath.replaceAll("\\.pdf$", "") + "_fullText.txt";
File outFile = new File(outPath);
try {
LapdfDocument lapdf = engine.blockifyPdfFile(pdf);
engine.classifyDocument(lapdf, ruleFile);
engine.writeTextToFile(lapdf, stack, outFile);
} catch (Exception e) {
e.printStackTrace();
}
} else {
throw new Exception(WatchDirectory.USAGE + type
+ " is not a prescribed exectution command.");
}
}
}