package de.uni_passau.fim.infosun.prophet.util.qTree.handlers;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import au.com.bytecode.opencsv.CSVWriter;
import de.uni_passau.fim.infosun.prophet.Constants;
import de.uni_passau.fim.infosun.prophet.util.qTree.QTreeNode;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Elements;
import nu.xom.ParentNode;
import nu.xom.ParsingException;
/**
* Handles CSV operations for the <code>QTree</code>
*/
public final class QTreeCSVHandler extends QTreeFormatHandler {
private QTreeCSVHandler() {}
private static final String PATH_SEPARATOR = ":";
/**
* Recursively searches the given <code>directory</code> for files named <code>fileName</code> and
* collects them in a list. If the given <code>File</code> is not a directory an empty list will be returned.
*
* @param directory
* the directory to be searched
* @param fileName
* the filename to be searched for
*
* @return the list of files
*/
public static List<File> getFilesByName(File directory, String fileName) {
Objects.requireNonNull(directory, "directory must not be null!");
Objects.requireNonNull(fileName, "fileName must not be null!");
List<File> xmlFiles = new ArrayList<>();
if (!directory.isDirectory()) {
return xmlFiles;
}
File[] files = directory.listFiles();
if (files == null) {
return xmlFiles;
}
for (File file : files) {
if (file.isDirectory()) {
xmlFiles.addAll(getFilesByName(file, fileName));
} else {
if (file.getName().equals(fileName)) {
xmlFiles.add(file);
}
}
}
return xmlFiles;
}
/**
* Exports all files named {@value Constants#FILE_ANSWERS} in CSV format to the specified <code>saveFile</code>.
*
* @param answerDir
* the directory containing the {@value Constants#FILE_ANSWERS} files; subdirectories will be searched
* @param saveFile
* the file to save the resulting CSV to
*/
public static void exportCSV(File answerDir, File saveFile) {
List<File> files = getFilesByName(answerDir, Constants.FILE_ANSWERS);
List<String[]> lines = new ArrayList<>();
CharsetDecoder utf8decoder;
for (File file : files) {
utf8decoder = StandardCharsets.UTF_8.newDecoder();
try (Reader reader = new InputStreamReader(new FileInputStream(file), utf8decoder)) {
Builder builder = new Builder();
Document document = builder.build(reader);
if (lines.isEmpty()) {
List<String> line = new ArrayList<>();
makeLine(line, document.getRootElement(), QTreeCSVHandler::headerFor);
lines.add(line.toArray(new String[line.size()]));
}
List<String> line = new ArrayList<>();
makeLine(line, document.getRootElement(), QTreeCSVHandler::contentFor);
lines.add(line.toArray(new String[line.size()]));
} catch (IOException | ParsingException e) {
System.err.println("Could not parse " + Constants.FILE_ANSWERS + " file " + file.getAbsolutePath());
System.err.println(e.getMessage());
}
}
CharsetEncoder utf8encoder = StandardCharsets.UTF_8.newEncoder();
try (Writer writer = new OutputStreamWriter(new FileOutputStream(saveFile), utf8encoder)) {
CSVWriter csvWriter = new CSVWriter(writer, ';', '"');
csvWriter.writeAll(lines);
} catch (IOException e) {
System.err.println("Could not write the CSV export file " + saveFile.getAbsolutePath());
System.err.println(e.getMessage());
}
}
/**
* Makes one line of the CSV file and stores the generated CSV values in the given <code>List line</code>.
* Can be configured (using <code>extractor</code>) to generate the header or a content line of the CSV file.
*
* @param line
* the <code>List</code> to store the line in
* @param element
* the XML <code>Element</code> for whose line is to be created; must be of type QTreeNode
* @param extractor
* a function producing the CSV field value for the element itself and all its answers
*/
private static void makeLine(List<String> line, Element element, Function<Element, String> extractor) {
Element answers = element.getFirstChildElement("answers");
if (answers != null) {
Elements answerEntries = answers.getChildElements("entry");
for (int i = 0; i < answerEntries.size(); i++) {
line.add(extractor.apply(answerEntries.get(i)));
}
}
line.add(extractor.apply(element));
Elements children = element.getChildElements(QTreeNode.class.getSimpleName());
for (int i = 0; i < children.size(); i++) {
makeLine(line, children.get(i), extractor);
}
}
/**
* Produces the header <code>String</code> for the given <code>element</code>.
*
* @param element
* the <code>Element</code>; must be either 'QTreeNode' or 'entry'
*
* @return the header or an empty <code>String</code> if the type of the <code>Element</code> is not supported
*/
private static String headerFor(Element element) {
String name = element.getLocalName();
if (name.equals(QTreeNode.class.getSimpleName())) {
List<String> path = pathTo(element);
path.add(PATH_SEPARATOR);
path.add("answerTime");
return path.stream().reduce(String::concat).get();
} else if ("entry".equals(name)) {
List<String> path = pathTo(element);
path.add(PATH_SEPARATOR);
path.add(element.getFirstChildElement("string").getValue());
return path.stream().reduce(String::concat).get();
} else {
System.err.println("Unrecognized element " + name);
return "";
}
}
/**
* Produces the content <code>String</code> for the given <code>element</code>.
*
* @param element
* the <code>Element</code>; must be either 'QTreeNode' or 'entry'
*
* @return the header or an empty <code>String</code> if the type of the <code>Element</code> is not supported
*/
private static String contentFor(Element element) {
String name = element.getLocalName();
if (name.equals(QTreeNode.class.getSimpleName())) {
return element.getAttributeValue("answerTime");
} else if ("entry".equals(name)) {
Elements answerElements = element.getFirstChildElement("string-array").getChildElements("string");
if (answerElements.size() == 1) {
return answerElements.get(0).getValue();
} else {
StringWriter csv = new StringWriter();
CSVWriter writer = new CSVWriter(csv, ',', '\'', "");
String[] answers = new String[answerElements.size()];
for (int i = 0; i < answerElements.size(); i++) {
answers[i] = answerElements.get(i).getValue();
}
writer.writeNext(answers);
return csv.toString();
}
} else {
System.err.println("Unrecognized element " + name);
return "";
}
}
/**
* Gives the path from the root QTreeNode to the given <code>Element</code> (or the QTreeNode father of the
* <code>Element</code>) separated by {@value #PATH_SEPARATOR}.
*
* @param target
* the <code>Element</code> whose path is to be constructed
*
* @return the path
*/
private static List<String> pathTo(Element target) {
List<String> path = new ArrayList<>();
boolean done = false;
Element element = target;
while (!element.getLocalName().equals(QTreeNode.class.getSimpleName())) {
element = (Element) element.getParent();
}
while (!done) {
ParentNode parentNode = element.getParent();
path.add(0, element.getAttributeValue("name"));
if (!(parentNode instanceof Document)) {
path.add(0, PATH_SEPARATOR);
element = (Element) parentNode;
} else {
done = true;
}
}
return path;
}
}