package com.formulasearchengine.mathosphere.utils;
import com.formulasearchengine.mathmltools.xmlhelper.XMLHelper;
import org.apache.commons.cli.*;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.StringWriter;
/**
* Created by Moritz on 13.08.2015. <p> Creates a harvest file from a folder
*/
@SuppressWarnings("AccessStaticViaInstance")
public class HarvestFromFiles {
public static void main(String[] args) {
Options options = new Options();
Option help = new Option("help", "print this message");
Option dataSource = OptionBuilder.withArgName("file")
.hasArg()
.isRequired()
.withDescription("use given file for data source")
.withLongOpt("datasource")
.create("d");
Option resultSink = OptionBuilder.withArgName("file")
.hasArg()
.withDescription("specify file for the output")
.withLongOpt("output")
.create("o");
options.addOption(dataSource)
.addOption(resultSink)
.addOption(help)
.addOption("i", "ignoreUnderscores", false, "Ignores everything that comes after the last underscore in the filename.");
CommandLineParser parser = new GnuParser();
try {
CommandLine line = parser.parse(options, args);
if (line.hasOption("help")) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("java -jar FILENAME.jar", options);
} else {
processDocs(line);
}
} catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
}
}
private static void processDocs(CommandLine line) {
File folder = new File(line.getOptionValue("datasource"));
Document doc;
try {
doc = XMLHelper.getNewDocument(true);
} catch (ParserConfigurationException e) {
e.printStackTrace();
return;
}
Element har = doc.createElementNS("http://search.mathweb.org/ns", "harvest");
for (final File fileEntry : folder.listFiles()) {
if (!fileEntry.isDirectory()) {
String fname = fileEntry.getName();
if (fname.endsWith(".xml")) {
if (line.hasOption("ignoreUnderscores")) {
fname = fname.split("_")[0];
}
fname = fname.replaceFirst("\\.xml", "");
try {
addFile(doc, har, fileEntry, fname);
} catch (ParserConfigurationException | IOException | SAXException e) {
System.out.println("Can not process input " + fname);
e.printStackTrace();
}
}
}
}
doc.appendChild(har);
try {
writeOutput(line, doc);
} catch (TransformerException | IOException e) {
System.out.println("Can not write output.");
e.printStackTrace();
}
}
private static void addFile(Document doc, Element har, File fileEntry, String fname) throws ParserConfigurationException, IOException, SAXException {
DocumentBuilder builder = null;
builder = XMLHelper.getDocumentBuilder(true);
Document xContent = builder.parse(fileEntry.getCanonicalFile());
Node copiedXDocument = doc.importNode(xContent.getDocumentElement(), true);
Node mwsExpr = doc.createElementNS("http://search.mathweb.org/ns", "expr");
Attr url = doc.createAttribute("url");
url.setValue(fname);
mwsExpr.getAttributes().setNamedItem(url);
mwsExpr.appendChild(copiedXDocument);
har.appendChild(mwsExpr);
}
private static void writeOutput(CommandLine line, Document doc) throws TransformerException, IOException {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = null;
transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StringWriter sw = new StringWriter();
transformer.transform(new DOMSource(doc), new StreamResult(sw));
if (!line.hasOption("output")) {
System.out.println(sw.toString());
} else {
File f = new File(line.getOptionValue("output"));
f.createNewFile();
FileOutputStream fos = new FileOutputStream(f);
fos.write(sw.toString().getBytes());
}
}
}