package eu.fbk.knowledgestore.populator.rdf;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
import com.google.common.io.CharStreams;
import com.google.common.io.Files;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.helpers.RDFHandlerBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ch.qos.logback.classic.LoggerContext;
import ch.qos.logback.classic.joran.JoranConfigurator;
import ch.qos.logback.core.joran.spi.JoranException;
import eu.fbk.knowledgestore.Operation;
import eu.fbk.knowledgestore.Outcome;
import eu.fbk.knowledgestore.Session;
import eu.fbk.knowledgestore.client.Client;
import eu.fbk.knowledgestore.data.Criteria;
import eu.fbk.knowledgestore.data.Data;
import eu.fbk.knowledgestore.data.Handler;
import eu.fbk.knowledgestore.data.Record;
import eu.fbk.knowledgestore.data.Stream;
import eu.fbk.knowledgestore.internal.Compression;
import eu.fbk.knowledgestore.internal.Util;
import eu.fbk.knowledgestore.internal.rdf.RDFUtil;
import eu.fbk.knowledgestore.vocabulary.CKR;
import eu.fbk.knowledgestore.vocabulary.KS;
public final class RDFPopulator {
private static final String VERSION = Util.getVersion("eu.fbk.knowledgestore",
"ks-populator-rdf", "devel");
private static final String HEADER = Util.getResource(RDFPopulator.class, "header").trim();
private static final String FOOTER = Util.getResource(RDFPopulator.class, "footer").trim();
private static final String DISCLAIMER = Util.getResource(RDFPopulator.class, "disclaimer")
.trim();
private static final Logger MAIN_LOGGER = LoggerFactory.getLogger(RDFPopulator.class);
private static final Logger STATUS_LOGGER = LoggerFactory.getLogger("status");
public static void main(final String... args) {
try {
// Parse command line, handling -h and -v commands
final CommandLine cmd = parseCommandLine(args);
// Extract command line options
final String base = cmd.getOptionValue('b');
final int parallelism = !cmd.hasOption('p') ? 1 : //
Integer.parseInt(cmd.getOptionValue('p'));
final boolean listStdin = cmd.hasOption('@');
final String listFile = cmd.getOptionValue('T');
final List<String> sourceFiles = cmd.getArgList();
final boolean sourceStdin = !cmd.hasOption('@') && !cmd.hasOption('T')
&& cmd.getArgs().length == 0;
final String sourceFormat = cmd.getOptionValue('s');
final String errorFile = cmd.getOptionValue('e');
final String target = cmd.getOptionValue('o');
final String targetFormat = cmd.getOptionValue('t');
final boolean validate = !cmd.hasOption('i');
final Criteria criteria = !cmd.hasOption('c') ? Criteria.overwrite() : //
Criteria.parse(cmd.getOptionValue('c'), Data.getNamespaceMap());
final URI globalURI = cmd.hasOption('g') ? (URI) Data.parseValue(
cmd.getOptionValue('g'), Data.getNamespaceMap()) : CKR.GLOBAL;
final String credentials = cmd.getOptionValue('u');
// Split username / password
String username = null;
String password = null;
if (credentials != null) {
final int index = credentials.indexOf(':');
username = credentials.substring(0, index < 0 ? credentials.length() : index);
password = index < 0 ? null : credentials.substring(index + 1);
}
// Select input files based on supplied options and arguments
final List<File> sources = select(listStdin, listFile, sourceFiles, sourceStdin);
for (final File file : sources) {
checkFileParseable(file, sourceFormat);
}
// Setup axiom decoding
final Stream<Record> axioms = decode(sources, globalURI, parallelism, base,
sourceFormat);
// Handle 3 cases based on option -o
if (target == null) {
// (1) emit axioms to STDOUT
disableLogging();
final OutputStream out = System.out;
System.setOut(new PrintStream(ByteStreams.nullOutputStream()));
write(axioms, out, targetFormat);
} else if (!target.startsWith("http://") && !target.startsWith("https://")) {
// (2) emit axioms to FILE
write(axioms, new File(target), targetFormat);
} else {
// (3) upload axioms to KS, emit rejected axioms to FILE / STDERR
Session session = null;
final Client client = Client.builder(target).maxConnections(2)
.validateServer(validate).build();
try {
session = client.newSession(username, password);
final Stream<Record> rejected = upload(session, criteria, axioms);
if (errorFile == null) {
write(rejected, System.err, targetFormat);
} else {
write(rejected, new File(errorFile), targetFormat);
}
} finally {
Util.closeQuietly(session);
client.close();
}
}
// Signal success
System.exit(0);
} catch (final IllegalArgumentException ex) {
// Signal wrong user input
ex.printStackTrace();
System.err.println("INVALID INPUT. " + ex.getMessage());
System.exit(-1);
} catch (final ParseException ex) {
// Signal syntax error
System.err.println("SYNTAX ERROR. " + ex.getMessage());
System.exit(-1);
} catch (final Throwable ex) {
// Signal other error
System.err.println("EXECUTION FAILED. " + ex.getMessage() + "\n");
ex.printStackTrace();
System.exit(-2);
}
}
private static CommandLine parseCommandLine(final String... args) throws ParseException {
// Define input options
final List<Option> inputOpts = Lists.newArrayList();
newOption(inputOpts, '@', "files-from-stdin", 0, false, null,
"read names of input files from STDIN");
newOption(inputOpts, 'T', "files-from", 1, false, "FILE",
"read names of input files from FILE");
newOption(inputOpts, 's', "source-format", 1, false, "FMT",
"use input RDF format/compression FMT (eg: ttl.gz; default: "
+ "autodetect based on file name)");
newOption(inputOpts, 'b', "base", 1, false, "URI",
"base URI for resolving parsed relative URIs");
newOption(inputOpts, 'p', "parallel-files", 1, false, "N",
"parse at most N files in parallel (default: 1)");
// Define extraction options
final List<Option> extractOpts = Lists.newArrayList();
newOption(extractOpts, 'g', "global-uri", 1, false, "URI",
"use URI in place of ckr:global (default: ckr:global)");
newOption(extractOpts, 'd', "default", 1, false, "FILE",
"augment axioms with default metadata/context in FILE");
// Define output options
final List<Option> outputOpts = Lists.newArrayList();
newOption(outputOpts, 'o', "output", 1, false, "FILE|URL",
"send axioms to FILE | server URL (default: STDOUT)");
newOption(outputOpts, 'e', "error", 1, false, "FILE",
"write non-uploaded axioms to FILE (default: STDERR)");
newOption(outputOpts, 't', "target-format", 1, false, "FMT",
"use output file RDF format/compression FMT (e.g., ttl.gz; "
+ "default: autodetect based on file name)");
newOption(outputOpts, 'u', "user", 1, false, "user[:pwd]",
"upload using login user:pwd (default: anonymous)");
newOption(outputOpts, 'i', "ignore-certificate", 0, false, null,
"don't check server certificate (default: check)");
newOption(outputOpts, 'c', "criteria", 1, false, "C",
"upload with merge criteria C (default: overwrite *)");
// -U|--proxy-user "user[:password]" proxy
// -x|--proxy host:port
// Define miscellaneous options
final List<Option> miscOpts = Lists.newArrayList();
newOption(miscOpts, 'h', "help", 0, false, null, "print this help message and exit");
newOption(miscOpts, 'v', "version", 0, false, null, "print version information and exit");
// Define combined option list
final List<Option> allOpts = ImmutableList.copyOf(Iterables.concat(inputOpts, extractOpts,
outputOpts, miscOpts));
// Parse command line
final CommandLine cmd = new GnuParser().parse(newOptions(allOpts), args);
// Handle help and version commands
if (cmd.hasOption('h')) {
final HelpFormatter formatter = new HelpFormatter();
formatter.setOptionComparator(new Comparator<Option>() {
@Override
public int compare(final Option option1, final Option option2) {
return allOpts.indexOf(option1) - allOpts.indexOf(option2);
}
});
final PrintWriter out = new PrintWriter(System.out);
formatter.printUsage(out, 80, "ksrdf [-o URL|FILE] [OPTIONS] [INPUT_FILE ...]");
out.println();
formatter.printWrapped(out, 80, HEADER);
formatter.printWrapped(out, 80, DISCLAIMER);
out.println("\nInput options:");
formatter.printOptions(out, 80, newOptions(inputOpts), 2, 2);
out.println("\nExtraction options:");
formatter.printOptions(out, 80, newOptions(extractOpts), 2, 5);
out.println("\nOutput options:");
formatter.printOptions(out, 80, newOptions(outputOpts), 2, 2);
out.println("\nMiscellaneous options:");
formatter.printOptions(out, 80, newOptions(miscOpts), 2, 14);
out.println();
out.println(FOOTER);
out.flush();
System.exit(0);
} else if (cmd.hasOption('v')) {
System.out.println(String.format(
"ksrdf (FBK KnowledgeStore) %s\njava %s bit (%s) %s\n%s", VERSION,
System.getProperty("sun.arch.data.model"), System.getProperty("java.vendor"),
System.getProperty("java.version"), DISCLAIMER));
System.exit(0);
}
// Return parsed options
return cmd;
}
private static List<File> select(final boolean listStdin, final String listFile,
final List<String> sourceFiles, final boolean sourceStdin) throws IOException {
// Extract file names from non-option command line arguments
final List<String> inputs = Lists.newArrayList(sourceFiles);
// Extract file names from the file pointed by option -T, if any
if (listFile != null) {
final File file = new File(listFile);
checkFileExist(file);
for (final String line : Files.readLines(file, Charsets.UTF_8)) {
final String trimmedLine = line.trim();
if (!"".equals(trimmedLine)) {
inputs.add(line);
}
}
}
// Extract file names from STDIN, if option -@ has been specified
if (listStdin) {
for (final String line : CharStreams.readLines(new InputStreamReader(System.in))) {
final String trimmedLine = line.trim();
if (!"".equals(trimmedLine)) {
inputs.add(line);
}
}
}
// Convert to File objects and return the result
final List<File> files = Lists.newArrayListWithCapacity(inputs.size());
for (final String input : inputs) {
files.add(new File(input));
}
// Add null in case STDIN should be included
if (sourceStdin) {
files.add(null);
}
return files;
}
private static Stream<Record> decode(final List<File> files, final URI globalURI,
final int parallelism, @Nullable final String base, //
@Nullable final String formatString) {
// Determine source RDF format and compression based on format string
final Compression compression = detectCompression(formatString, null);
final RDFFormat format = detectRDFFormat(formatString, null);
// Return a stream that read input RDF and decodes contained axioms
return new Stream<Record>() {
@Override
protected void doToHandler(final Handler<? super Record> handler) throws Throwable {
// Create the decoder
final Decoder decoder = new Decoder(handler, globalURI);
// Wrap the decoder in a RDFHandler
RDFHandler rdfHandler = new RDFHandlerBase() {
@Override
public void handleStatement(final Statement stmt) throws RDFHandlerException {
emit(stmt);
}
@Override
public void endRDF() throws RDFHandlerException {
emit(null);
}
private void emit(@Nullable final Statement stmt) throws RDFHandlerException {
try {
decoder.handle(stmt);
} catch (final Throwable ex) {
Throwables.propagateIfPossible(ex, RDFHandlerException.class);
Throwables.propagate(ex);
}
}
};
// Add logging
rdfHandler = RDFUtil.newLoggingHandler(rdfHandler, STATUS_LOGGER, null,
"parsing: %d triples (%d triples/s, %d triples/s avg)", null);
// Add decoupling queue to parallelize parsing and encoding
rdfHandler = RDFUtil.newDecouplingHandler(rdfHandler, null);
// Perform parallel parsing.
final Map<File, RDFHandler> map = Maps.newLinkedHashMap();
for (final File file : files) {
final RDFHandler fileHandler = RDFUtil.newLoggingHandler(rdfHandler,
MAIN_LOGGER, null, null,
"parsed " + (file == null ? "STDIN" : file.getAbsolutePath())
+ ": %d triples, (%d triples/s avg)");
map.put(file, fileHandler);
}
rdfHandler.startRDF();
RDFUtil.readRDF(map, format, null, base, false, compression, parallelism);
rdfHandler.endRDF();
STATUS_LOGGER.info("");
}
};
}
private static Stream<Record> upload(final Session session, final Criteria criteria,
final Stream<Record> axioms) {
return axioms.transform(null, new Function<Handler<Record>, Handler<Record>>() {
@Override
public Handler<Record> apply(final Handler<Record> handler) {
return new UploadHandler(session, criteria, handler);
}
});
}
private static void write(final Stream<Record> axioms, final OutputStream stream,
@Nullable final String formatString) throws IOException {
// Determine target RDF format and compression based on format string
final Compression compression = detectCompression(formatString, Compression.NONE);
final RDFFormat format = detectRDFFormat(formatString, null);
if (format == null) {
if (formatString == null) {
throw new IllegalArgumentException(
"Must specify output format (-t) if writing to STDOUT");
} else {
throw new IllegalArgumentException("Cannot detect RDF format for " + formatString);
}
}
// Setup compression, if necessary
final OutputStream actualStream = compression.write(Data.getExecutor(), stream);
// Performs writing
RDFUtil.writeRDF(actualStream, format, Data.getNamespaceMap(), null,
Record.encode(axioms, ImmutableSet.of(KS.AXIOM)));
}
private static void write(final Stream<Record> axioms, final File file,
@Nullable final String formatString) throws IOException {
// Determine target RDF format and compression based on format string
Compression compression = detectCompression(file.getName(), null);
if (compression == null) {
compression = detectCompression(formatString, Compression.NONE);
}
RDFFormat format = detectRDFFormat(file.getName(), null);
if (format == null) {
format = detectRDFFormat(formatString, null);
}
if (format == null) {
throw new IllegalArgumentException("Cannot detect RDF format of " + file);
}
// Setup compression, if necessary
final OutputStream actualStream = compression.write(Data.getExecutor(), file);
// Performs writing
try {
RDFUtil.writeRDF(actualStream, format, Data.getNamespaceMap(), null,
Record.encode(axioms, ImmutableSet.of(KS.AXIOM)));
} finally {
Util.closeQuietly(actualStream);
}
}
private static Options newOptions(final Iterable<? extends Option> options) {
final Options result = new Options();
for (final Object option : options) {
result.addOption((Option) option);
}
return result;
}
private static void newOption(final Collection<? super Option> options,
@Nullable final Character shortName, final String longName, final int argCount,
final boolean argOpt, @Nullable final String argName, final String description) {
OptionBuilder.withLongOpt(longName);
OptionBuilder.withDescription(description);
if (argCount != 0) {
OptionBuilder.withArgName(argName);
if (argOpt) {
if (argCount == 1) {
OptionBuilder.hasOptionalArg();
} else if (argCount > 1) {
OptionBuilder.hasOptionalArgs(argCount);
} else {
OptionBuilder.hasOptionalArgs();
}
} else {
if (argCount == 1) {
OptionBuilder.hasArg();
} else if (argCount > 1) {
OptionBuilder.hasArgs(argCount);
} else {
OptionBuilder.hasArgs();
}
}
}
options.add(shortName == null ? OptionBuilder.create() : OptionBuilder.create(shortName));
}
private static RDFFormat detectRDFFormat(@Nullable final String string,
final RDFFormat fallback) {
return string == null ? fallback : RDFFormat.forFileName("dummy." + string.trim(),
fallback);
}
private static Compression detectCompression(@Nullable final String string,
final Compression fallback) {
return string == null ? fallback : Compression.forFileName("dummy." + string.trim(),
fallback);
}
private static void checkFileExist(@Nullable final File file) {
if (file == null) {
return;
} else if (!file.exists()) {
throw new IllegalArgumentException("File '" + file + "' does not exist");
} else if (file.isDirectory()) {
throw new IllegalArgumentException("Path '" + file + "' denotes a directory");
}
}
private static void checkFileParseable(@Nullable final File file,
@Nullable final String formatString) {
if (file == null) {
if (formatString == null) {
throw new IllegalArgumentException("Cannot detect RDF format "
+ "and compression of STDIN: please specify option -s");
}
return;
}
checkFileExist(file);
final RDFFormat defaultFormat = detectRDFFormat(formatString, null);
final Compression defaultCompression = detectCompression(formatString, null);
final RDFFormat format = RDFFormat.forFileName(file.getName());
if (format == null && defaultFormat == null) {
throw new IllegalArgumentException("Unknown RDF format for file " + file);
} else if (format != null && defaultFormat != null && !format.equals(defaultFormat)) {
System.err.println("Warning: detected RDF format for file " + file
+ " doesn't match specified format");
}
final Compression compression = Compression.forFileName(file.getName(), Compression.NONE);
if (defaultCompression != null && !compression.equals(defaultCompression)) {
System.err.println("Warning: detected compression format for file " + file
+ " doesn't match specified format");
}
}
private static void disableLogging() {
final LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
try {
final JoranConfigurator configurator = new JoranConfigurator();
configurator.setContext(context);
context.reset();
configurator.doConfigure(RDFPopulator.class.getResource("logback.disabled.xml"));
} catch (final JoranException je) {
// ignore
}
}
private static final class UploadHandler implements Handler<Record> {
private static final int BUFFER_SIZE = 1024;
private final Session session;
private final Criteria criteria;
private final Handler<Record> errorHandler;
private final Map<URI, Record> buffer;
UploadHandler(final Session session, final Criteria criteria,
final Handler<Record> errorHandler) {
this.session = session;
this.criteria = criteria;
this.errorHandler = errorHandler;
this.buffer = Maps.newHashMapWithExpectedSize(BUFFER_SIZE);
}
@Override
public void handle(final Record axiom) throws Throwable {
if (axiom == null) {
flush(true);
} else {
this.buffer.put(axiom.getID(), axiom);
if (this.buffer.size() == BUFFER_SIZE) {
flush(false);
}
}
}
private void flush(final boolean done) throws Throwable {
if (!this.buffer.isEmpty()) {
try {
final Operation.Merge operation = this.session.merge(KS.AXIOM)
.criteria(this.criteria).records(this.buffer.values());
operation.exec(new Handler<Outcome>() {
@Override
public void handle(final Outcome outcome) throws Throwable {
if (outcome.getStatus().isOK()) {
UploadHandler.this.buffer.remove(outcome.getObjectID());
}
}
});
} catch (final Throwable ex) {
MAIN_LOGGER.error("Upload failure: " + ex.getMessage(), ex);
}
for (final Record record : this.buffer.values()) {
this.errorHandler.handle(record);
}
}
if (done) {
this.errorHandler.handle(null);
}
}
}
}