/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.eval.reports;
import javax.xml.parsers.DocumentBuilder;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.poi.common.usermodel.Hyperlink;
import org.apache.tika.eval.ExtractComparer;
import org.apache.tika.eval.ExtractProfiler;
import org.apache.tika.eval.db.H2Util;
import org.apache.tika.eval.db.JDBCUtil;
import org.apache.tika.parser.ParseContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class ResultsReporter {
private static final Logger LOG = LoggerFactory.getLogger(ResultsReporter.class);
private static Options OPTIONS;
static {
OPTIONS = new Options();
OPTIONS.addOption("rd", "reportsDir", true, "directory for the reports. " +
"If not specified, will write to 'reports'" +
"BEWARE: Will overwrite existing reports without warning!")
.addOption("rf", "reportsFile", true, "xml specifying sql to call for the reports." +
"If not specified, will use default reports in resources/tika-eval-*-config.xml")
.addOption("db", true, "default database (in memory H2). Specify a file name for the H2 database.")
.addOption("jdbc", true, "EXPERT: full jdbc connection string. Specify this or use -db <h2db_name>")
.addOption("jdbcdriver", true, "EXPERT: specify the jdbc driver class if all else fails")
.addOption("tablePrefix", true, "EXPERT: if not using the default tables, specify your table name prefix");
}
public static void USAGE() {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp(
80,
"java -jar tika-eval-x.y.jar Report -db mydb [-rd myreports] [-rf myreports.xml]",
"Tool: Report",
ResultsReporter.OPTIONS,
"Note: for h2 db, do not include the .mv.db at the end of the db name.");
}
List<String> before = new ArrayList<>();
List<String> after = new ArrayList<>();
List<Report> reports = new ArrayList<>();
private void addBefore(String b) {
before.add(b);
}
private void addAfter(String a) {
after.add(a);
}
private void addReport(Report r) {
reports.add(r);
}
public static ResultsReporter build(Path p) throws Exception {
ResultsReporter r = new ResultsReporter();
DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
Document doc;
try (InputStream is = Files.newInputStream(p)) {
doc = docBuilder.parse(is);
}
Node docElement = doc.getDocumentElement();
assert (docElement.getNodeName().equals("reports"));
NodeList children = docElement.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node n = children.item(i);
if ("before".equals(n.getNodeName())) {
for (String before : getSql(n)) {
r.addBefore(before);
}
} else if ("after".equals(n.getNodeName())) {
for (String after : getSql(n)) {
r.addAfter(after);
}
} else if ("report".equals(n.getNodeName())) {
Report report = buildReport(n);
r.addReport(report);
}
}
return r;
}
private static Report buildReport(Node n) {
NodeList children = n.getChildNodes();
Report r = new Report();
NamedNodeMap attrs = n.getAttributes();
r.includeSql = Boolean.parseBoolean(attrs.getNamedItem("includeSql").getNodeValue());
r.reportFilename = attrs.getNamedItem("reportFilename").getNodeValue();
r.reportName = attrs.getNamedItem("reportName").getNodeValue();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() != 1) {
continue;
}
if ("sql".equals(child.getNodeName())) {
if (r.sql != null) {
throw new IllegalArgumentException("Can only have one sql statement per report");
}
r.sql = child.getTextContent();
} else if ("colformats".equals(child.getNodeName())) {
r.cellFormatters = getCellFormatters(child);
} else {
throw new IllegalArgumentException("Not expecting to see:" + child.getNodeName());
}
}
return r;
}
private static Map<String, XSLXCellFormatter> getCellFormatters(Node n) {
NodeList children = n.getChildNodes();
Map<String, XSLXCellFormatter> ret = new HashMap<>();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() != 1) {
continue;
}
NamedNodeMap attrs = child.getAttributes();
String columnName = attrs.getNamedItem("name").getNodeValue();
assert (!ret.containsKey(columnName));
String type = attrs.getNamedItem("type").getNodeValue();
if ("numberFormatter".equals(type)) {
String format = attrs.getNamedItem("format").getNodeValue();
XSLXCellFormatter f = new XLSXNumFormatter(format);
ret.put(columnName, f);
} else if ("urlLink".equals(type)) {
String base = "";
Node baseNode = attrs.getNamedItem("base");
if (baseNode != null) {
base = baseNode.getNodeValue();
}
XLSXHREFFormatter f = new XLSXHREFFormatter(base, Hyperlink.LINK_URL);
ret.put(columnName, f);
} else if ("fileLink".equals(type)) {
String base = "";
Node baseNode = attrs.getNamedItem("base");
if (baseNode != null) {
base = baseNode.getNodeValue();
}
XLSXHREFFormatter f = new XLSXHREFFormatter(base, Hyperlink.LINK_FILE);
ret.put(columnName, f);
}
}
return ret;
}
private static List<String> getSql(Node n) {
List<String> ret = new ArrayList<>();
NodeList children = n.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() != 1) {
continue;
}
ret.add(child.getTextContent());
}
return ret;
}
public static void main(String[] args) throws Exception {
DefaultParser defaultCLIParser = new DefaultParser();
CommandLine commandLine = null;
try {
commandLine = defaultCLIParser.parse(OPTIONS, args);
} catch (ParseException e) {
System.out.println(e.getMessage());
USAGE();
return;
}
JDBCUtil dbUtil = null;
if (commandLine.hasOption("db")) {
Path db = Paths.get(commandLine.getOptionValue("db"));
if (!H2Util.databaseExists(db)) {
throw new RuntimeException("I'm sorry, but I couldn't find this h2 database: "
+ db+ "\nMake sure not to include the .mv.db at the end.");
}
dbUtil = new H2Util(db);
} else if (commandLine.hasOption("jdbc")) {
String driverClass = null;
if (commandLine.hasOption("jdbcdriver")) {
driverClass = commandLine.getOptionValue("jdbcdriver");
}
dbUtil = new JDBCUtil(commandLine.getOptionValue("jdbc"), driverClass);
} else {
System.err.println("Must specify either -db for the default in-memory h2 database\n" +
"or -jdbc for a full jdbc connection string");
USAGE();
return;
}
try (Connection c = dbUtil.getConnection()) {
Path tmpReportsFile = null;
try {
ResultsReporter resultsReporter = null;
String reportsFile = commandLine.getOptionValue("rf");
if (reportsFile == null) {
tmpReportsFile = getDefaultReportsConfig(c);
resultsReporter = ResultsReporter.build(tmpReportsFile);
} else {
resultsReporter = ResultsReporter.build(Paths.get(reportsFile));
}
Path reportsRootDirectory = Paths.get(commandLine.getOptionValue("rd", "reports"));
if (Files.isDirectory(reportsRootDirectory)) {
LOG.warn("'Reports' directory exists. Will overwrite existing reports.");
}
resultsReporter.execute(c, reportsRootDirectory);
} finally {
if (tmpReportsFile != null) {
Files.delete(tmpReportsFile);
}
}
}
}
private static Path getDefaultReportsConfig(Connection c) throws IOException, SQLException {
DatabaseMetaData md = c.getMetaData();
String internalPath = null;
try (ResultSet rs = md.getTables(null, null, "%", null)) {
while (rs.next()) {
String tName = rs.getString(3);
if (ExtractComparer.CONTENTS_TABLE_B.getName().equalsIgnoreCase(tName)) {
internalPath = "/comparison-reports.xml";
break;
} else if (ExtractProfiler.PROFILE_TABLE.getName().equalsIgnoreCase(tName)) {
internalPath = "/profile-reports.xml";
break;
}
}
}
if (internalPath == null) {
throw new RuntimeException("Couldn't determine if this database was a 'profiler' or 'comparison' db");
}
Path tmp = Files.createTempFile("tmp-tika-reports", ".xml");
Files.copy(ResultsReporter.class.getResourceAsStream(internalPath), tmp, StandardCopyOption.REPLACE_EXISTING);
return tmp;
}
public void execute(Connection c, Path reportsDirectory) throws IOException, SQLException {
Statement st = c.createStatement();
for (String sql : before) {
st.execute(sql);
}
for (Report r : reports) {
r.writeReport(c, reportsDirectory);
}
for (String sql : after) {
st.execute(sql);
}
}
}