package at.ac.univie.mminf.qskos4j.cmd;
import at.ac.univie.mminf.qskos4j.QSkos;
import at.ac.univie.mminf.qskos4j.issues.Issue;
import at.ac.univie.mminf.qskos4j.progress.ConsoleProgressMonitor;
import at.ac.univie.mminf.qskos4j.progress.StreamProgressMonitor;
import at.ac.univie.mminf.qskos4j.util.vocab.InvalidRdfException;
import at.ac.univie.mminf.qskos4j.util.vocab.RepositoryBuilder;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.beust.jcommander.Parameters;
import org.openrdf.OpenRDFException;
import org.openrdf.repository.Repository;
import org.openrdf.rio.RDFFormat;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
public class VocEvaluate {
public final static String CMD_NAME_ANALYZE = "analyze";
public final static String CMD_NAME_SUMMARIZE = "summarize";
private static JCommander jc;
private CommandSummarize parsedCommand;
private QSkos qskos;
private ReportCollector reportCollector;
@Parameter(names = {"-v", "--version"}, description = "Outputs version of the tool")
private boolean outputVersion = false;
@Parameters(commandNames = CMD_NAME_SUMMARIZE, commandDescription = "Computes basic statistics of a given vocabulary")
private class CommandSummarize {
@SuppressWarnings("unused")
@Parameter(description = "vocabularyfile")
private List<String> vocabFilenames;
@SuppressWarnings("unused")
@Parameter(names = {"-a", "--auth-resource-identifier"}, description = "Authoritative resource identifier")
private String authoritativeResourceIdentifier;
@SuppressWarnings("unused")
@Parameter(names = {"-c", "--check"}, description = "Comma-separated list of issue/statistics IDs to check for")
private String selectedIds;
@SuppressWarnings("unused")
@Parameter(names = {"-dc", "--dont-check"}, description = "Comma-separated list of issue/statistics IDs NOT to check for")
private String excludedIds;
@Parameter(names = {"-xl", "--skosxl"}, description = "Enable SKOSXL support")
private boolean enableSkosXl = false;
@Parameter(names = {"-np", "--no-progress"}, description = "Suppresses output of a progress indicator")
private boolean noProgressBar = false;
@SuppressWarnings("unused")
@Parameter(names = {"-d", "--debug"}, description = "Enable additional informative/debug output")
private boolean debug;
@SuppressWarnings("unused")
@Parameter(names = {"-o", "--output"}, description = "Name of the file that holds the generated report")
private String reportFileName;
@SuppressWarnings("unused")
@Parameter(names = {"-sf", "--stream-friendly"}, description = "Print the progress indicator in a stream-friendly format")
private boolean streamFriendly;
}
@Parameters(commandNames = CMD_NAME_ANALYZE, commandDescription = "Analyzes quality issues of a given vocabulary")
private class CommandAnalyze extends CommandSummarize {
@SuppressWarnings("unused")
@Parameter(names = {"-sp", "--use-subset-percentage"}, description = "Use a specified percentage of the vocabulary triples for evaluation")
private Float randomSubsetSize_percent;
@Parameter(names = {"-wg", "--write-graphs"}, description = "Writes graphs as .dot files to current directory")
private boolean writeGraphs = false;
}
public static void main(String[] args) {
try {
new VocEvaluate(args);
}
catch (ParameterException paramExc) {
jc.usage();
System.err.println("!! " +paramExc.getMessage());
}
catch (IOException ioException) {
System.err.println("!! Error reading file: " +ioException.getMessage());
}
catch (OpenRDFException rdfException) {
System.err.println("!! Error processing vocabulary: " +rdfException.getMessage());
}
}
public VocEvaluate(String[] args) throws OpenRDFException, IOException
{
qskos = new QSkos();
parseCmdParams(args);
if (outputVersion) {
System.out.println("Version: " +getClass().getPackage().getImplementationVersion());
}
if (parsedCommand == null) {
jc.usage();
return;
}
try {
listIssuesOrEvaluate();
}
catch (InvalidRdfException e) {
System.err.println("!! Provided input file does not contain valid RDF data");
System.exit(1);
}
catch (Exception e) {
e.printStackTrace(System.err);
System.exit(1);
}
}
private void parseCmdParams(String[] args) {
jc = new JCommander(this);
CommandAnalyze commandAnalyze = new CommandAnalyze();
CommandSummarize commandSummarize = new CommandSummarize();
jc.addCommand(commandAnalyze);
jc.addCommand(commandSummarize);
jc.parse(args);
String command = jc.getParsedCommand();
if (command != null) {
if (command.equals(CMD_NAME_ANALYZE)) {
parsedCommand = commandAnalyze;
}
if (command.equals(CMD_NAME_SUMMARIZE)) {
parsedCommand = commandSummarize;
}
}
}
private void listIssuesOrEvaluate() throws OpenRDFException, IOException
{
if (parsedCommand.vocabFilenames == null) {
if (parsedCommand instanceof CommandAnalyze) {
outputIssueDetails(Issue.IssueType.ANALYTICAL);
}
else {
outputIssueDetails(Issue.IssueType.STATISTICAL);
}
}
else {
checkVocabFilenameGiven();
evaluate();
}
}
private void outputIssueDetails(Issue.IssueType constraintType) {
for (Issue issue : qskos.getAllIssues()) {
if (issue.getType() == constraintType) {
System.out.println("---");
System.out.println("ID: " +issue.getId());
System.out.println("Name: " +issue.getName());
System.out.println("Description: " +issue.getDescription());
if (issue.getWeblink() != null) {
System.out.println("Further Informaton: <" +issue.getWeblink().stringValue()+ ">");
}
}
}
}
private void checkVocabFilenameGiven() throws ParameterException
{
if (parsedCommand.vocabFilenames == null) {
throw new ParameterException("Please provide a vocabulary file");
}
if (parsedCommand.reportFileName == null) {
throw new ParameterException("Please provide a report output file");
}
}
private void evaluate() throws OpenRDFException, IOException
{
setup();
String command = jc.getParsedCommand();
reportCollector = new ReportCollector(extractMeasures(),
parsedCommand.reportFileName,
parsedCommand.vocabFilenames,
command.equals(CMD_NAME_ANALYZE));
reportCollector.outputIssuesReport(shouldWriteGraphs());
}
private void setup() throws OpenRDFException, IOException {
setupLogging();
RepositoryBuilder repositoryBuilder = new RepositoryBuilder();
File inputFile = new File(parsedCommand.vocabFilenames.get(0));
Repository repo = repositoryBuilder.setUpFromFile(inputFile, null, useRdfXmlFormatIfExtensionIsXml(inputFile));
qskos.setRepositoryConnection(repo.getConnection());
qskos.setAuthResourceIdentifier(parsedCommand.authoritativeResourceIdentifier);
qskos.addSparqlEndPoint("http://sparql.sindice.com/sparql");
qskos.addSparqlEndPoint("http://semantic.ckan.net/sparql");
if (parsedCommand instanceof CommandAnalyze) {
qskos.setSubsetSize(((CommandAnalyze) parsedCommand).randomSubsetSize_percent);
}
if (parsedCommand.enableSkosXl) {
repositoryBuilder.enableSkosXlSupport();
}
if (!parsedCommand.noProgressBar) {
if (parsedCommand.streamFriendly) {
qskos.setProgressMonitor(new StreamProgressMonitor());
}
else {
qskos.setProgressMonitor(new ConsoleProgressMonitor());
}
}
}
private RDFFormat useRdfXmlFormatIfExtensionIsXml(File inputFile) {
if (inputFile.getName().toLowerCase().endsWith(".xml")) {
return RDFFormat.RDFXML;
}
return null;
}
private void setupLogging() {
if (parsedCommand.debug) {
System.setProperty("root-level", "DEBUG");
}
}
private boolean shouldWriteGraphs() {
return parsedCommand instanceof CommandAnalyze && ((CommandAnalyze) parsedCommand).writeGraphs;
}
private Collection<Issue> extractMeasures()
{
Collection<Issue> resultingIssues;
Collection<Issue> selectedIssues = qskos.getIssues(parsedCommand.selectedIds);
Collection<Issue> excludedIssues = qskos.getIssues(parsedCommand.excludedIds);
if (!selectedIssues.isEmpty()) {
resultingIssues = selectedIssues;
}
else if (!excludedIssues.isEmpty()) {
resultingIssues = getAllIssuesForCommand();
resultingIssues.removeAll(excludedIssues);
}
else {
resultingIssues = getAllIssuesForCommand();
}
return resultingIssues;
}
private Collection<Issue> getAllIssuesForCommand() {
List<Issue> issuesForCommand = new ArrayList<>();
for (Issue issue : qskos.getAllIssues()) {
String command = jc.getParsedCommand();
if ((issue.getType() == Issue.IssueType.ANALYTICAL && command.equals(CMD_NAME_ANALYZE)) ||
(issue.getType() == Issue.IssueType.STATISTICAL && command.equals(CMD_NAME_SUMMARIZE)))
{
issuesForCommand.add(issue);
}
}
return issuesForCommand;
}
}